Sunday, 22 April 2012

WavFile.h


////////////////////////////////////////////////////////////////////////////////
///
/// Classes for easy reading & writing of WAV sound files.
///
/// For big-endian CPU, define BIG_ENDIAN during compile-time to correctly
/// parse the WAV files with such processors.
///
/// Admittingly, more complete WAV reader routines may exist in public domain, but
/// the reason for 'yet another' one is that those generic WAV reader libraries are
/// exhaustingly large and cumbersome! Wanted to have something simpler here, i.e.
/// something that's not already larger than rest of the SoundTouch/SoundStretch program...
///
/// Author        : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed  : $Date: 2006/02/05 16:44:06 $
// File revision : $Revision: 1.7 $
//
// $Id: WavFile.h,v 1.7 2006/02/05 16:44:06 Olli Exp $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
//  SoundTouch audio processing library
//  Copyright (c) Olli Parviainen
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//
//  You should have received a copy of the GNU Lesser General Public
//  License along with this library; if not, write to the Free Software
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
////////////////////////////////////////////////////////////////////////////////

#ifndef WAVFILE_H
#define WAVFILE_H

#include <stdio.h>

#ifndef uint
typedef unsigned int uint;
#endif        


/// WAV audio file 'riff' section header
typedef struct
{
    char riff_char[4];
    int  package_len;
    char wave[4];
} WavRiff;

/// WAV audio file 'format' section header
typedef struct
{
    char  fmt[4];
    int   format_len;
    short fixed;
    short channel_number;
    int   sample_rate;
    int   byte_rate;
    short byte_per_sample;
    short bits_per_sample;
} WavFormat;

/// WAV audio file 'data' section header
typedef struct
{
    char  data_field[4];
    uint  data_len;
} WavData;


/// WAV audio file header
typedef struct
{
    WavRiff   riff;
    WavFormat format;
    WavData   data;
} WavHeader;


/// Class for reading WAV audio files.
class WavInFile
{
private:
    /// File pointer.
    FILE *fptr;

    /// Counter of how many bytes of sample data have been read from the file.
    uint dataRead;

    /// WAV header information
    WavHeader header;

    /// Read WAV file headers.
    /// \return zero if all ok, nonzero if file format is invalid.
    int readWavHeaders();

    /// Checks WAV file header tags.
    /// \return zero if all ok, nonzero if file format is invalid.
    int checkCharTags();

    /// Reads a single WAV file header block.
    /// \return zero if all ok, nonzero if file format is invalid.
    int readHeaderBlock();

    /// Reads WAV file 'riff' block
    int readRIFFBlock();

public:
    /// Constructor: Opens the given WAV file. If the file can't be opened,
    /// throws 'runtime_error' exception.
    WavInFile(const char *filename);

    /// Destructor: Closes the file.
    ~WavInFile();

    /// Close the file. Notice that file is automatically closed also when the
    /// class instance is deleted.
    void close();

    /// Rewind to beginning of the file
    void rewind();

    /// Get sample rate.
    uint getSampleRate() const;

    /// Get number of bits per sample, i.e. 8 or 16.
    uint getNumBits() const;

    /// Get sample data size in bytes. Ahem, this should return same information as
    /// 'getBytesPerSample'...
    uint getDataSizeInBytes() const;

    /// Get total number of samples in file.
    uint getNumSamples() const;

    /// Get number of bytes per audio sample (e.g. 16bit stereo = 4 bytes/sample)
    uint getBytesPerSample() const;
 
    /// Get number of audio channels in the file (1=mono, 2=stereo)
    uint getNumChannels() const;

    /// Get the audio file length in milliseconds
    uint getLengthMS() const;

    /// Reads audio samples from the WAV file. This routine works only for 8 bit samples.
    /// Reads given number of elements from the file or if end-of-file reached, as many
    /// elements as are left in the file.
    ///
    /// \return Number of 8-bit integers read from the file.
    int read(char *buffer, int maxElems);

    /// Reads audio samples from the WAV file to 16 bit integer format. Reads given number
    /// of elements from the file or if end-of-file reached, as many elements as are
    /// left in the file.
    ///
    /// \return Number of 16-bit integers read from the file.
    int read(short *buffer,     ///< Pointer to buffer where to read data.
             int maxElems       ///< Size of 'buffer' array (number of array elements).
             );

    /// Reads audio samples from the WAV file to floating point format, converting
    /// sample values to range [-1,1[. Reads given number of elements from the file
    /// or if end-of-file reached, as many elements as are left in the file.
    ///
    /// \return Number of elements read from the file.
    int read(float *buffer,     ///< Pointer to buffer where to read data.
             int maxElems       ///< Size of 'buffer' array (number of array elements).
             );

    /// Check end-of-file.
    ///
    /// \return Nonzero if end-of-file reached.
    int eof() const;
};



/// Class for writing WAV audio files.
class WavOutFile
{
private:
    /// Pointer to the WAV file
    FILE *fptr;

    /// WAV file header data.
    WavHeader header;

    /// Counter of how many bytes have been written to the file so far.
    int bytesWritten;

    /// Fills in WAV file header information.
    void fillInHeader(const uint sampleRate, const uint bits, const uint channels);

    /// Finishes the WAV file header by supplementing information of amount of
    /// data written to file etc
    void finishHeader();

    /// Writes the WAV file header.
    void writeHeader();

public:
    /// Constructor: Creates a new WAV file. Throws a 'runtime_error' exception
    /// if file creation fails.
    WavOutFile(const char *fileName,    ///< Filename
               int sampleRate,          ///< Sample rate (e.g. 44100 etc)
               int bits,                ///< Bits per sample (8 or 16 bits)
               int channels             ///< Number of channels (1=mono, 2=stereo)
               );

    /// Destructor: Finalizes & closes the WAV file.
    ~WavOutFile();

    /// Write data to WAV file. This function works only with 8bit samples.
    /// Throws a 'runtime_error' exception if writing to file fails.
    void write(const char *buffer,     ///< Pointer to sample data buffer.
               int numElems             ///< How many array items are to be written to file.
               );

    /// Write data to WAV file. Throws a 'runtime_error' exception if writing to
    /// file fails.
    void write(const short *buffer,     ///< Pointer to sample data buffer.
               int numElems             ///< How many array items are to be written to file.
               );

    /// Write data to WAV file in floating point format, saturating sample values to range
    /// [-1..+1[. Throws a 'runtime_error' exception if writing to file fails.
    void write(const float *buffer,     ///< Pointer to sample data buffer.
               int numElems             ///< How many array items are to be written to file.
               );

    /// Finalize & close the WAV file. Automatically supplements the WAV file header
    /// information according to written data etc.
    ///
    /// Notice that file is automatically closed also when the class instance is deleted.
    void close();
};

#endif

Easy reading & writing of WAV sound files.


////////////////////////////////////////////////////////////////////////////////
///
/// Classes for easy reading & writing of WAV sound files.
///
/// For big-endian CPU, define _BIG_ENDIAN_ during compile-time to correctly
/// parse the WAV files with such processors.
///
/// Admittingly, more complete WAV reader routines may exist in public domain,
/// but the reason for 'yet another' one is that those generic WAV reader
/// libraries are exhaustingly large and cumbersome! Wanted to have something
/// simpler here, i.e. something that's not already larger than rest of the
/// SoundTouch/SoundStretch program...
///
/// Author        : Copyright (c) Olli Parviainen
/// Author e-mail : oparviai 'at' iki.fi
/// SoundTouch WWW: http://www.surina.net/soundtouch
///
////////////////////////////////////////////////////////////////////////////////
//
// Last changed  : $Date: 2006/02/05 16:44:06 $
// File revision : $Revision: 1.15 $
//
// $Id: WavFile.cpp,v 1.15 2006/02/05 16:44:06 Olli Exp $
//
////////////////////////////////////////////////////////////////////////////////
//
// License :
//
//  SoundTouch audio processing library
//  Copyright (c) Olli Parviainen
//
//  This library is free software; you can redistribute it and/or
//  modify it under the terms of the GNU Lesser General Public
//  License as published by the Free Software Foundation; either
//  version 2.1 of the License, or (at your option) any later version.
//
//  This library is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
//  Lesser General Public License for more details.
//
//  You should have received a copy of the GNU Lesser General Public
//  License along with this library; if not, write to the Free Software
//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
////////////////////////////////////////////////////////////////////////////////

#include <stdio.h>
#include <stdexcept>
#include <string>
#include <assert.h>
#include <limits.h>

#include "WavFile.h"

using namespace std;

const static char riffStr[] = "RIFF";
const static char waveStr[] = "WAVE";
const static char fmtStr[]  = "fmt ";
const static char dataStr[] = "data";


//////////////////////////////////////////////////////////////////////////////
//
// Helper functions for swapping byte order to correctly read/write WAV files
// with big-endian CPU's: Define compile-time definition _BIG_ENDIAN_ to
// turn-on the conversion if it appears necessary.
//
// For example, Intel x86 is little-endian and doesn't require conversion,
// while PowerPC of Mac's and many other RISC cpu's are big-endian.

#ifdef BYTE_ORDER
    // In gcc compiler detect the byte order automatically
    #if BYTE_ORDER == BIG_ENDIAN
        // big-endian platform.
        #define _BIG_ENDIAN_
    #endif
#endif
   
#ifdef _BIG_ENDIAN_
    // big-endian CPU, swap bytes in 16 & 32 bit words

    // helper-function to swap byte-order of 32bit integer
    static inline void _swap32(unsigned int &dwData)
    {
        dwData = ((dwData >> 24) & 0x000000FF) |
                 ((dwData >> 8)  & 0x0000FF00) |
                 ((dwData << 8)  & 0x00FF0000) |
                 ((dwData << 24) & 0xFF000000);
    }  

    // helper-function to swap byte-order of 16bit integer
    static inline void _swap16(unsigned short &wData)
    {
        wData = ((wData >> 8) & 0x00FF) |
                ((wData << 8) & 0xFF00);
    }

    // helper-function to swap byte-order of buffer of 16bit integers
    static inline void _swap16Buffer(unsigned short *pData, unsigned int dwNumWords)
    {
        unsigned long i;

        for (i = 0; i < dwNumWords; i ++)
        {
            _swap16(pData[i]);
        }
    }

#else   // BIG_ENDIAN
    // little-endian CPU, WAV file is ok as such

    // dummy helper-function
    static inline void _swap32(unsigned int &dwData)
    {
        // do nothing
    }  

    // dummy helper-function
    static inline void _swap16(unsigned short &wData)
    {
        // do nothing
    }

    // dummy helper-function
    static inline void _swap16Buffer(unsigned short *pData, unsigned int dwNumBytes)
    {
        // do nothing
    }

#endif  // BIG_ENDIAN


//////////////////////////////////////////////////////////////////////////////
//
// Class WavInFile
//

WavInFile::WavInFile(const char *fileName)
{
    int hdrsOk;

    // Try to open the file for reading
    fptr = fopen(fileName, "rb");
    if (fptr == NULL)
    {
        // didn't succeed
        string msg = "Error : Unable to open file \"";
        msg += fileName;
        msg += "\" for reading.";
        throw runtime_error(msg);
    }

    // Read the file headers
    hdrsOk = readWavHeaders();
    if (hdrsOk != 0)
    {
        // Something didn't match in the wav file headers
        string msg = "File \"";
        msg += fileName;
        msg += "\" is corrupt or not a WAV file";
        throw runtime_error(msg);
    }

    if (header.format.fixed != 1)
    {
        string msg = "File \"";
        msg += fileName;
        msg += "\" uses unsupported encoding.";
        throw runtime_error(msg);
    }

    dataRead = 0;
}



WavInFile::~WavInFile()
{
    close();
}



void WavInFile::rewind()
{
    int hdrsOk;

    fseek(fptr, 0, SEEK_SET);
    hdrsOk = readWavHeaders();
    assert(hdrsOk == 0);
    dataRead = 0;
}


int WavInFile::checkCharTags()
{
    // header.format.fmt should equal to 'fmt '
    if (memcmp(fmtStr, header.format.fmt, 4) != 0) return -1;
    // header.data.data_field should equal to 'data'
    if (memcmp(dataStr, header.data.data_field, 4) != 0) return -1;

    return 0;
}


int WavInFile::read(char *buffer, int maxElems)
{
    int numBytes;
    uint afterDataRead;

    // ensure it's 8 bit format
    if (header.format.bits_per_sample != 8)
    {
        throw runtime_error("Error: WavInFile::read(char*, int) works only with 8bit samples.");
    }
    assert(sizeof(char) == 1);

    numBytes = maxElems;
    afterDataRead = dataRead + numBytes;
    if (afterDataRead > header.data.data_len)
    {
        // Don't read more samples than are marked available in header
        numBytes = header.data.data_len - dataRead;
        assert(numBytes >= 0);
    }

    numBytes = fread(buffer, 1, numBytes, fptr);
    dataRead += numBytes;

    return numBytes;
}


int WavInFile::read(short *buffer, int maxElems)
{
    unsigned int afterDataRead;
    int numBytes;
    int numElems;

    if (header.format.bits_per_sample == 8)
    {
        // 8 bit format
        char *temp = new char[maxElems];
        int i;

        numElems = read(temp, maxElems);
        // convert from 8 to 16 bit
        for (i = 0; i < numElems; i ++)
        {
            buffer[i] = temp[i] << 8;
        }
        delete[] temp;
    }
    else
    {
        // 16 bit format
        assert(header.format.bits_per_sample == 16);
        assert(sizeof(short) == 2);

        numBytes = maxElems * 2;
        afterDataRead = dataRead + numBytes;
        if (afterDataRead > header.data.data_len)
        {
            // Don't read more samples than are marked available in header
            numBytes = header.data.data_len - dataRead;
            assert(numBytes >= 0);
        }

        numBytes = fread(buffer, 1, numBytes, fptr);
        dataRead += numBytes;
        numElems = numBytes / 2;

        // 16bit samples, swap byte order if necessary
        _swap16Buffer((unsigned short *)buffer, numElems);
    }

    return numElems;
}



int WavInFile::read(float *buffer, int maxElems)
{
    short *temp = new short[maxElems];
    int num;
    int i;
    double fscale;

    num = read(temp, maxElems);

    fscale = 1.0 / 32768.0;
    // convert to floats, scale to range [-1..+1[
    for (i = 0; i < num; i ++)
    {
        buffer[i] = (float)(fscale * (double)temp[i]);
    }

    delete[] temp;

    return num;
}


int WavInFile::eof() const
{
    // return true if all data has been read or file eof has reached
    return (dataRead == header.data.data_len || feof(fptr));
}


void WavInFile::close()
{
    fclose(fptr);
    fptr = NULL;
}



// test if character code is between a white space ' ' and little 'z'
static int isAlpha(char c)
{
    return (c >= ' ' && c <= 'z') ? 1 : 0;
}


// test if all characters are between a white space ' ' and little 'z'
static int isAlphaStr(char *str)
{
    int c;

    c = str[0];
    while (c)
    {
        if (isAlpha(c) == 0) return 0;
        str ++;
        c = str[0];
    }

    return 1;
}


int WavInFile::readRIFFBlock()
{
    fread(&(header.riff), sizeof(WavRiff), 1, fptr);

    // swap 32bit data byte order if necessary
    _swap32((unsigned int &)header.riff.package_len);

    // header.riff.riff_char should equal to 'RIFF');
    if (memcmp(riffStr, header.riff.riff_char, 4) != 0) return -1;
    // header.riff.wave should equal to 'WAVE'
    if (memcmp(waveStr, header.riff.wave, 4) != 0) return -1;

    return 0;
}




int WavInFile::readHeaderBlock()
{
    char label[5];
    string sLabel;

    // lead label string
    fread(label, 1, 4, fptr);
    label[4] = 0;

    if (isAlphaStr(label) == 0) return -1;    // not a valid label

    // Decode blocks according to their label
    if (strcmp(label, fmtStr) == 0)
    {
        int nLen, nDump;

        // 'fmt ' block
        memcpy(header.format.fmt, fmtStr, 4);

        // read length of the format field
        fread(&nLen, sizeof(int), 1, fptr);
        // swap byte order if necessary
        _swap32((unsigned int &)nLen); // int format_len;
        header.format.format_len = nLen;

        // calculate how much length differs from expected
        nDump = nLen - (sizeof(header.format) - 8);

        // if format_len is larger than expected, read only as much data as we've space for
        if (nDump > 0)
        {
            nLen = sizeof(header.format) - 8;
        }

        // read data
        fread(&(header.format.fixed), nLen, 1, fptr);

        // swap byte order if necessary
        _swap16((unsigned short &)header.format.fixed);            // short int fixed;
        _swap16((unsigned short &)header.format.channel_number);   // short int channel_number;
        _swap32((unsigned int   &)header.format.sample_rate);      // int sample_rate;
        _swap32((unsigned int   &)header.format.byte_rate);        // int byte_rate;
        _swap16((unsigned short &)header.format.byte_per_sample);  // short int byte_per_sample;
        _swap16((unsigned short &)header.format.bits_per_sample);  // short int bits_per_sample;

        // if format_len is larger than expected, skip the extra data
        if (nDump > 0)
        {
            fseek(fptr, nDump, SEEK_CUR);
        }

        return 0;
    }
    else if (strcmp(label, dataStr) == 0)
    {
        // 'data' block
        memcpy(header.data.data_field, dataStr, 4);
        fread(&(header.data.data_len), sizeof(uint), 1, fptr);

        // swap byte order if necessary
        _swap32((unsigned int &)header.data.data_len);

        return 1;
    }
    else
    {
        uint len, i;
        uint temp;
        // unknown block

        // read length
        fread(&len, sizeof(len), 1, fptr);
        // scan through the block
        for (i = 0; i < len; i ++)
        {
            fread(&temp, 1, 1, fptr);
            if (feof(fptr)) return -1;   // unexpected eof
        }
    }
    return 0;
}


int WavInFile::readWavHeaders()
{
    int res;

    memset(&header, 0, sizeof(header));

    res = readRIFFBlock();
    if (res) return 1;
    // read header blocks until data block is found
    do
    {
        // read header blocks
        res = readHeaderBlock();
        if (res < 0) return 1;  // error in file structure
    } while (res == 0);
    // check that all required tags are legal
    return checkCharTags();
}


uint WavInFile::getNumChannels() const
{
    return header.format.channel_number;
}


uint WavInFile::getNumBits() const
{
    return header.format.bits_per_sample;
}


uint WavInFile::getBytesPerSample() const
{
    return getNumChannels() * getNumBits() / 8;
}


uint WavInFile::getSampleRate() const
{
    return header.format.sample_rate;
}



uint WavInFile::getDataSizeInBytes() const
{
    return header.data.data_len;
}


uint WavInFile::getNumSamples() const
{
    return header.data.data_len / header.format.byte_per_sample;
}


uint WavInFile::getLengthMS() const
{
   uint numSamples;
   uint sampleRate;

   numSamples = getNumSamples();
   sampleRate = getSampleRate();

   assert(numSamples < UINT_MAX / 1000);
   return (1000 * numSamples / sampleRate);
}


//////////////////////////////////////////////////////////////////////////////
//
// Class WavOutFile
//

WavOutFile::WavOutFile(const char *fileName, int sampleRate, int bits, int channels)
{
    bytesWritten = 0;
    fptr = fopen(fileName, "wb");
    if (fptr == NULL)
    {
        string msg = "Error : Unable to open file \"";
        msg += fileName;
        msg += "\" for writing.";
        //pmsg = msg.c_str;
        throw runtime_error(msg);
    }

    fillInHeader(sampleRate, bits, channels);
    writeHeader();
}



WavOutFile::~WavOutFile()
{
    close();
}



void WavOutFile::fillInHeader(uint sampleRate, uint bits, uint channels)
{
    // fill in the 'riff' part..

    // copy string 'RIFF' to riff_char
    memcpy(&(header.riff.riff_char), riffStr, 4);
    // package_len unknown so far
    header.riff.package_len = 0;
    // copy string 'WAVE' to wave
    memcpy(&(header.riff.wave), waveStr, 4);


    // fill in the 'format' part..

    // copy string 'fmt ' to fmt
    memcpy(&(header.format.fmt), fmtStr, 4);

    header.format.format_len = 0x10;
    header.format.fixed = 1;
    header.format.channel_number = (short)channels;
    header.format.sample_rate = sampleRate;
    header.format.bits_per_sample = (short)bits;
    header.format.byte_per_sample = (short)(bits * channels / 8);
    header.format.byte_rate = header.format.byte_per_sample * sampleRate;
    header.format.sample_rate = sampleRate;

    // fill in the 'data' part..

    // copy string 'data' to data_field
    memcpy(&(header.data.data_field), dataStr, 4);
    // data_len unknown so far
    header.data.data_len = 0;
}


void WavOutFile::finishHeader()
{
    // supplement the file length into the header structure
    header.riff.package_len = bytesWritten + 36;
    header.data.data_len = bytesWritten;

    writeHeader();
}



void WavOutFile::writeHeader()
{
    WavHeader hdrTemp;

    // swap byte order if necessary
    hdrTemp = header;
    _swap32((unsigned int   &)hdrTemp.riff.package_len);
    _swap32((unsigned int   &)hdrTemp.format.format_len);
    _swap16((unsigned short &)hdrTemp.format.fixed);
    _swap16((unsigned short &)hdrTemp.format.channel_number);
    _swap32((unsigned int   &)hdrTemp.format.sample_rate);
    _swap32((unsigned int   &)hdrTemp.format.byte_rate);
    _swap16((unsigned short &)hdrTemp.format.byte_per_sample);
    _swap16((unsigned short &)hdrTemp.format.bits_per_sample);
    _swap32((unsigned int   &)hdrTemp.data.data_len);

    // write the supplemented header in the beginning of the file
    fseek(fptr, 0, SEEK_SET);
    fwrite(&hdrTemp, sizeof(hdrTemp), 1, fptr);
    // jump back to the end of the file
    fseek(fptr, 0, SEEK_END);
}



void WavOutFile::close()
{
    finishHeader();
    fclose(fptr);
    fptr = NULL;
}


void WavOutFile::write(const char *buffer, int numElems)
{
    int res;

    if (header.format.bits_per_sample != 8)
    {
        throw runtime_error("Error: WavOutFile::write(const char*, int) accepts only 8bit samples.");
    }
    assert(sizeof(char) == 1);

    res = fwrite(buffer, 1, numElems, fptr);
    if (res != numElems)
    {
        throw runtime_error("Error while writing to a wav file.");
    }

    bytesWritten += numElems;
}


void WavOutFile::write(const short *buffer, int numElems)
{
    int res;

    // 16 bit samples
    if (numElems < 1) return;   // nothing to do

    if (header.format.bits_per_sample == 8)
    {
        int i;
        char *temp = new char[numElems];
        // convert from 16bit format to 8bit format
        for (i = 0; i < numElems; i ++)
        {
            temp[i] = buffer[i] >> 8;
        }
        // write in 8bit format
        write(temp, numElems);
        delete[] temp;
    }
    else
    {
        // 16bit format
        unsigned short *pTemp = new unsigned short[numElems];

        assert(header.format.bits_per_sample == 16);

        // allocate temp buffer to swap byte order if necessary
        memcpy(pTemp, buffer, numElems * 2);
        _swap16Buffer(pTemp, numElems);

        res = fwrite(pTemp, 2, numElems, fptr);

        delete[] pTemp;

        if (res != numElems)
        {
            throw runtime_error("Error while writing to a wav file.");
        }
        bytesWritten += 2 * numElems;
    }
}


void WavOutFile::write(const float *buffer, int numElems)
{
    int i;
    short *temp = new short[numElems];
    int iTemp;

    // convert to 16 bit integer
    for (i = 0; i < numElems; i ++)
    {
        // convert to integer
        iTemp = (int)(32768.0f * buffer[i]);

        // saturate
        if (iTemp < -32768) iTemp = -32768;
        if (iTemp > 32767)  iTemp = 32767;
        temp[i] = (short)iTemp;
    }

    write(temp, numElems);

    delete[] temp;
}

Simple SOLA algorithm Main.cpp


/////////////////////////////////////////////////////////////////////
//
// Simple SOLA algorithm example. The example reads a .wav sound
// file with mono-16bit-44100Hz sample format, process it with SOLA
// and writes output into another .wav file.
//
// Copyright (c) Olli Parviainen 2006 <oparviai@iki.fi>
//
/////////////////////////////////////////////////////////////////////

#include <stdexcept>
#include "wavfile.h"

using namespace std;

// Time scaling factor, values > 1.0 increase, values < 1.0 decrease tempo
#define TIME_SCALE      1   // 15% slower tempo
// Processing sequence size (100 msec with 44100Hz samplerate)
#define SEQUENCE        800//4410
// Overlapping size (20 msec)
#define OVERLAP         160//882
// Best overlap offset seeking window (15 msec)
#define SEEK_WINDOW     120//662
// Processing sequence flat mid-section duration
#define FLAT_DURATION   (SEQUENCE - 2 * (OVERLAP))
// Theoretical interval between the processing seqeuences
#define SEQUENCE_SKIP   ((int)((SEQUENCE - OVERLAP) * (TIME_SCALE)))

typedef short SAMPLE;   // sample type, 16bit signed integer

// Use cross-correlation function to find best overlapping offset
// where input_prev and input_new match best with each other
int seek_best_overlap(const SAMPLE *input_prev, const SAMPLE *input_new)
{
   int i;
   int bestoffset = 0;
   float bestcorr = -1e30f;
   float temp[OVERLAP];

   // Precalculate overlapping slopes with input_prev
   for (i = 0; i < OVERLAP; i ++)
   {
      temp[i] = (float)(input_prev[i] * i * (OVERLAP - i));
   }

   // Find best overlap offset within [0..SEEK_WINDOW]
   for (i = 0; i < SEEK_WINDOW; i ++)
   {
      int j;
      float crosscorr = 0;

      for (j = 0; j < OVERLAP; j ++)
      {
         crosscorr += (float)input_new[i + j] * temp[j];
      }
      if (crosscorr > bestcorr)
      {
         // found new best offset candidate
         bestcorr = crosscorr;
         bestoffset = i;
      }
   }
   return bestoffset;
}


// Overlap 'input_prev' with 'input_new' by sliding the amplitudes during
// OVERLAP samples. Store result to 'output'.
void overlap(SAMPLE *output, const SAMPLE *input_prev, const SAMPLE *input_new)
{
   int i;

   for (i = 0; i < OVERLAP; i ++)
   {
      output[i] = (input_prev[i] * (OVERLAP - i) + input_new[i] * i) / OVERLAP;
   }
}


// SOLA algorithm. Performs time scaling for sample data given in 'input',
// write result to 'output'. Return number of output samples.
int sola(SAMPLE *output, const SAMPLE *input, int num_in_samples)
{
   int num_out_samples = 0;
   const SAMPLE *seq_offset = input;
   const SAMPLE *prev_offset;

   int nTest = SEQUENCE_SKIP;
   while (num_in_samples > SEQUENCE_SKIP + SEEK_WINDOW)
   {
      // copy flat mid-sequence from current processing sequence to output
      memcpy(output, seq_offset, FLAT_DURATION * sizeof(SAMPLE));
      // calculate a pointer to overlap at end of the processing sequence
      prev_offset = seq_offset + FLAT_DURATION;

      // update input pointer to theoretical next processing sequence begin
      input += SEQUENCE_SKIP - OVERLAP;
      // seek actual best matching offset using cross-correlation
      seq_offset = input + seek_best_overlap(prev_offset, input);

      // do overlapping between previous & new sequence, copy result to output
      overlap(output + FLAT_DURATION, prev_offset, seq_offset);

      // Update input & sequence pointers by overlapping amount
      seq_offset += OVERLAP;
      input  += OVERLAP;

      // Update output pointer & sample counters
      output += SEQUENCE - OVERLAP;
      num_out_samples += SEQUENCE - OVERLAP;
      num_in_samples -= SEQUENCE_SKIP;
   }

   return num_out_samples;
}



// Buffers for input/output sample data. For sake of simplicity, these are
// just made 'big enough' for the example purpose.
SAMPLE inbuffer[10240000];
SAMPLE outbuffer[20240000];

int main(int numstr, char **pstr)
{

   if (numstr < 3)
   {
      printf("usage: solatest input.wav output.wav\n");
      return -1;
   }

   try
   {
      int insamples, outsamples;

      // Open input file
      WavInFile infile(pstr[1]);

      if ((infile.getSampleRate() != 44100) || (infile.getNumChannels() != 1))
      {
         printf("Sorry, this example processes mono audio sampled at 44100Hz.\n");
         return -1;
      }

      // Read data from input file
      insamples = infile.read(inbuffer, 10240000);

      // Process
      outsamples = sola(outbuffer, inbuffer, insamples);

      // Write result to output file
      WavOutFile outfile(pstr[2], infile.getSampleRate(), infile.getNumBits(), infile.getNumChannels());
      outfile.write(outbuffer, outsamples);
   }
   catch (exception &e)
   {
      printf("Error: %s\n", e.what());
   }

   return 0;
}

SOLA.M MATHLAB


sa=585;ss=438;           %ÕâÊÇ?µµ?µÄ
w=512;
wov=w-ss;kmax=500;
x=wavread('v017');
%*********** time scaling **************
xst=1;yout=[];
xbuff=x(sa:sa+w-1);
st=sa:sa:length(x);         %?ªÊ?Ê?ÓïÒô?Î?ÓSa???ªÊ?ÊäÈë,ÒÔºóÃ??ÎÏòºóÒÆ??Sa?öµã
r=mod(length(x),sa);    
num=(length(x)-r)/sa;      %?Ü??Òª?øÐÐnumÂÖ?Ù??
x=[x; zeros(w+kmax,1)];%ÓïÒôÎ??ÎÐèÒª??0??ÒòΪ?ÖÎö???ÚW?áÒÆ????ÇÒ?î?óÒÆ??kmax?öµã
for j=1:(num)
    y=x(xst:1:xst+w-1);        %?ªÊ?Ê?ÓïÒô?Î?ÓSa??
    start=st(j):st(j)+kmax-1;     %?ÖÎö???ÚµÄÆðµã???î?àÒÆ??kmax?öµã
    cy=y(end:-1:end-wov+1);    %È??öÊä?öÐòÁÐyµÄºówov?öµã
    cy=cy(end:-1:1);
    km_buf=zeros(1,kmax);     %ÓÃÀ??ÇÂ?kmax?ö??Ïà?ØϵÊý
    for i=1:kmax
        xbuff=x(start(i):start(i)+w-1);    %?ÖÎö???ÚËù?ØÈ?µÄw?öµã
        cx=xbuff(1:wov);              %È??öÇ?wov?öµã
        rxx_k=sum(cx.^2);          
        rxy_k=sum(cx.*cy);
        if ( rxx_k==0)               %ÈôΪÁã???íÊ?Òѵ?ÓïÒôÎ??Î?Õ?Å??0µÄ???Ö??ÔòÍ?Ö?
            kmbuf(i)=0;
            break;
        else
            km_buf(i)=(rxy_k.^2)./rxx_k;
        end
    end
    km=find(km_buf==max(km_buf));   %ÕÒ?ö?î?óµÄ??Ïà?ØϵÊýÔÚkm_bufµÄÎ?ÖÃ
    yout=[yout; x(start(km)+wov:start(km)+w+1)]; %?ÑÒ?ÖÂÐÔ?îºÃµÄÐòÁеÄSs?öµã??Ϊ?îºóÊä?ö
    xst=xst+sa;
end
%************?ä?ÉÑùÂÊ************************
L=sa;M=ss;
data=[];
data_out=[];
y_end=length(yout);
y_st=0;
for j=1:L;
    for i=1:M;
        y_st=y_st+1;
        if y_st<y_end
            invert=linspace(yout(y_st),yout(y_st+1),L+1);%ÔÚÃ?Á??öµãÖ??äÏßÐÎ?åÈëL-1?öµã
        elseif y_st==y_end
            invert=linspace(yout(y_st),0,L+1); %ÈôÊÇ?îºóÒ??öµã??ÔòËüÓëÁãÖ??ä?åÈëL-1?öµã
        else
            break;
        end
        data=[data invert(1:end-1)];   %Ã??Î?ÑL?öµãÊä?öµ?data
    end
    data_out=[data_out data(1:M:end)];  %ÔÚdataÖÐÃ??ôM?öµãÈ??öÒ??öµã????Êä?öµ?data_out
    data=[];                       %?ÑdataµÄÄÚÈÝÇå?ý
end

Pitch Synchronous Overlap Add Method (PSOLA.CPP)


PSOLA.CPP

#include "../common/tdpsola.h"
#include "psola.h"

CPSOLA instance;

void PSOLA_EnableCosineSmooth(bool enable)
{
    instance.enableCosineSmooth(enable);
}

void PSOLA_SetSpectralMapping(bool useBezier, int x1, int y1, int x2, int y2)
{
    instance.setSpectralMapping(useBezier, x1, y1, x2, y2);
}

bool PSOLA_IsCosineSmoothEnabled()
{
    return instance.isCosineSmoothEnabled();
}

void PSOLA_EnableVoicelessExtension(int method)
{
    instance.setVoicelessExtension(method);
}

int PSOLA_GetVoicelessExtension()
{
    return instance.getVoicelessExtension();
}

unsigned PSOLA_ModifyPitchContour(
                            const short *   srcWave,
                            unsigned        srcLength,
                            const unsigned *srcTags,
                            unsigned        tagNumber,
                            const unsigned *trgPeriods,
                            unsigned        periodNumber,
                            unsigned        trgDuration,
                            float           specRatio,
                            short *         trgWave,
                            unsigned        trgBufferLength,
                            unsigned        sampleRate)
{
    return instance.modifyPitchContour(srcWave, srcLength, srcTags, tagNumber, trgWave, trgBufferLength, trgPeriods, periodNumber, trgDuration, specRatio, sampleRate);
}

unsigned PSOLA_Modify(
                const short *   srcWave,
                unsigned        srcLength,
                const unsigned *srcTags,
                unsigned        tagNumber,
                unsigned        trgPitch,
                unsigned        trgDuration,
                float           specRatio,
                short *         trgWave,
                unsigned        trgBufferLength,
                unsigned        sampleRate)
{
    return instance.modify(srcWave, srcLength, srcTags, tagNumber, trgWave, trgBufferLength, trgPitch, trgDuration, specRatio, sampleRate);
}

unsigned PSOLA_ModifyRatio(
                     const short *      srcWave,
                     unsigned           srcLength,
                     const unsigned *   srcTags,
                     unsigned           tagNumber,
                     float              pitchRatio,
                     float              durationRatio,
                     float              specRatio,
                     short *            trgWave,
                     unsigned           trgBufferLength,
                     unsigned           sampleRate
                     )
{
    return instance.modifyRatio(srcWave, srcLength, srcTags, tagNumber, trgWave, trgBufferLength, pitchRatio, durationRatio, specRatio, sampleRate);
}









PSOLA.H


///
/// Modify wave using TP-PSOLA algorithm
///
/// @version 1.0.0
/// @author Jun Xu
/// @date 2007/07/18
///
#ifndef _CST_PSOLA_PSOLA_H_
#define _CST_PSOLA_PSOLA_H_

#ifndef PSOLA_EXPORTS
#   define PSOLA_DLL_EXPORTS __declspec(dllimport)
#   ifdef _DEBUG
#       pragma comment(lib, "psolad.lib")
#       pragma message("Linking with psolad.dll")
#   else
#       pragma comment(lib, "psola.lib")
#       pragma message("Linking with psola.dll")
#   endif
#else
# define PSOLA_DLL_EXPORTS __declspec(dllexport)
#endif

#ifdef _cplusplus
extern "C" {
#endif

    #define PSOLA_VLPPMETHOD_NONE   0   ///< ÇåÒô¶Î²»×ö»ùƵÀ©Õ¹
    #define PSOLA_VLPPMETHOD_FIXED  1   ///< ÇåÒô¶Î×ö¹Ì¶¨ÖÜÆڵĻùƵÀ©Õ¹
    #define PSOLA_VLPPMETHOD_EQUAL  2   ///< ÇåÒô¶Î×öÓëµÚÒ»¸öÖÜÆÚÏàµÈµÄµÈÖÜÆÚÀ©Õ¹
    #define PSOLA_VLPPMETHOD_PEAK   3   ///< ÇåÒô¶Î¸ù¾ÝÓïÒô¼â·åµãÀ´×öÖÜÆÚÀ©Õ¹
    #define PSOLA_VLPPMETHOD_AUTO   4   ///< ×Ô¶¯×öÇåÒôÀ©Õ¹£¨¸ù¾Ýʱ³¤Ð޸ıÈÀý¾ö¶¨£©
    #define PSOLA_VLPPMETHOD_MAX    4

    ///
    /// ÉèÖÃÐ޸Ĺý³ÌÖеÄƵÆ×Ó³É䷽ʽ£¬Ð§¹û²»ºÃ£¬É÷ÓÃ
    ///
    /// @param useBezier    true:ʹÓñ´Èû¶ûÇúÏß,false:ʹÓÃÕÛÏß
    /// @param x1,y1        µÚÒ»¸ö¿ØÖƵãµÄ×ø±ê
    /// @param x2,y2        µÚ¶þ¸ö¿ØÖƵãµÄ×ø±ê
    ///
    PSOLA_DLL_EXPORTS void PSOLA_SetSpectralMapping(bool useBezier, int x1, int y1, int x2, int y2);

    ///
    /// ÉèÖÃÇåÒô¶ÎÖÜÆÚÀ©Õ¹·½Ê½
    ///
    /// @param method       0-3£¬²Î¼ûÉÏÃæµÄºê¶¨Òå
    ///
    PSOLA_DLL_EXPORTS void PSOLA_EnableVoicelessExtension(int method);

    /// »ñÈ¡ÇåÒô¶ÎÖÜÆÚÀ©Õ¹·½Ê½
    PSOLA_DLL_EXPORTS int PSOLA_GetVoicelessExtension();

    ///
    /// ÆôÓÃÓàÏÒº¯Êý½øÐÐÆ´½Ó±ß½çƽ»¬
    ///
    /// @param enable       true:ÆôÓÃ,false:²»ÆôÓÃ
    ///
    PSOLA_DLL_EXPORTS void PSOLA_EnableCosineSmooth(bool enable);

    /// ÅжÏÓàÏұ߽çƽ»¬ÊÇ·ñ±»ÆôÓÃ
    PSOLA_DLL_EXPORTS bool PSOLA_IsCosineSmoothEnabled();

    ///
    /// Modify wave using PSOLA model
    /// ʹÓÃPSOLAÄ£ÐͽøÐÐÓïÒôÐ޸ģ¬Ö¸¶¨Ä¿±êµÄƽ¾ù»ùƵÖÜÆÚÒÔ¼°ÓïÒô³¤¶È
    ///
    /// @param srcWave[in]      wave buffer read from speech database
    ///                         ÓïÒôÊý¾Ý£¬±ØÐëΪ16bit²ÉÑù¾«¶È
    /// @param srcLength[in]    wave buffer length, in short count
    ///                         ÓïÒôÊý¾ÝµÄ²ÉÑùµã¸öÊý
    /// @param srcTags[in]      peak tags read from speech database
    ///                         each tag indicate the peak position offset to the first sample of wave
    ///                         ÓïÒôÊý¾ÝµÄ·åÖµµã±ê×¢Êý×é
    ///                         ÄÚ²¿±£´æÿ¸ö·åÖµµãÏà¶ÔÓïÒôÆðʼµãµÄÆ«ÒÆλÖÃ
    /// @param tagNumber[in]    peak tag count of srcTags
    ///                         ·åÖµ±ê×¢¸öÊý
    /// @param trgPitch[in]     predicted average pitch period
    ///                         trgPitch=0 means keeping pitch no change
    ///                         Ä¿±ê»ùƵÖÜÆڵĴóС£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
    /// @param trgDuration[in]  predicted wave duration, in short
    ///                         Ä¿±êÓïÒô²ÉÑùµã¸öÊý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
    /// @param specRatio [in]   modification ratio of spectra
    ///                         ƵÆ×Ð޸ıÈÀý£¬0Ϊ²»ÐÞ¸Ä
    /// @param trgWave[out]     modified wave, buffer should be allocated outside
    ///                         Ä¿±êÓïÒôÊý¾Ý»º³åÇø£¬ÓÉÍⲿ·ÖÅ䣬Îñ±Ø±ÈtrgDurationÒª´óһЩ
    /// @param sampleRate[in]   Sample count per second, default is 16000
    ///                         ²ÉÑùÂÊ£¬Ò»°ãÇëʹÓÃ16000
    ///
    /// @return                 true if modified successfully
    ///                         false if not, then the content of trgWave if un-defined
    ///
    PSOLA_DLL_EXPORTS unsigned PSOLA_Modify(
        const short *   srcWave,
        unsigned        srcLength,
        const unsigned *srcTags,
        unsigned        tagNumber,
        unsigned        trgPitch,
        unsigned        trgDuration,
        float           specRatio,
        short *         trgWave,
        unsigned        trgBufferLength,
        unsigned        sampleRate);

    ///
    /// Modify wave using PSOLA model
    /// ʹÓÃPSOLAÄ£ÐͽøÐÐÓïÒôÐ޸ģ¬Ö¸¶¨Ä¿±ê»ùƵÇúÏß
    ///
    /// @param srcWave[in]      wave buffer read from speech database
    ///                         ÓïÒôÊý¾Ý£¬±ØÐëΪ16bit²ÉÑù¾«¶È
    /// @param srcLength[in]    wave buffer length, in short count
    ///                         ÓïÒôÊý¾ÝµÄ²ÉÑùµã¸öÊý
    /// @param srcTags[in]      peak tags read from speech database
    ///                         each tag indicate the peak position offset to the first sample of wave
    ///                         ÓïÒôÊý¾ÝµÄ·åÖµµã±ê×¢Êý×é
    ///                         ÄÚ²¿±£´æÿ¸ö·åÖµµãÏà¶ÔÓïÒôÆðʼµãµÄÆ«ÒÆλÖÃ
    /// @param tagNumber[in]    peak tag count of srcTags
    ///                         ·åÖµ±ê×¢¸öÊý
    /// @param trgPeriods[in]   predicted pitch period
    ///                         Ä¿±ê»ùƵÖÜÆÚÊý×é
    /// @param periodNumber[in] pitch period count of target
    ///                         Ä¿±ê»ùƵÖÜÆÚÊýÄ¿
    /// @param trgDuration[in]  predicted wave duration, in short
    ///                         Ä¿±êÓïÒô²ÉÑùµã¸öÊý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
    /// @param specRatio [in]   modification ratio of spectra
    ///                         ƵÆ×Ð޸ıÈÀý£¬0Ϊ²»ÐÞ¸Ä
    /// @param trgWave[out]     modified wave, buffer should be allocated outside
    ///                         Ä¿±êÓïÒôÊý¾Ý»º³åÇø£¬ÓÉÍⲿ·ÖÅ䣬Îñ±Ø±ÈtrgDurationÒª´óһЩ
    /// @param sampleRate[in]   Sample count per second, default is 16000
    ///                         ²ÉÑùÂÊ£¬Ò»°ãÇëʹÓÃ16000
    ///
    /// @return                 true if modified successfully
    ///                         false if not, then the content of trgWave if un-defined
    ///
    PSOLA_DLL_EXPORTS unsigned PSOLA_ModifyPitchContour(
        const short *   srcWave,
        unsigned        srcLength,
        const unsigned *srcTags,
        unsigned        tagNumber,
        const unsigned *trgPeriods,
        unsigned        periodNumber,
        unsigned        trgDuration,
        float           specRatio,
        short *         trgWave,
        unsigned        trgBufferLength,
        unsigned        sampleRate);

    ///
    /// Modify wave using PSOLA model
    /// ʹÓÃPSOLAÄ£ÐͽøÐÐÓïÒôÐ޸ģ¬Ö¸¶¨ÖÜÆÚ£¬Ê±³¤µÄÐ޸ıÈÀý
    ///
    /// @param srcWave[in]      wave buffer read from speech database
    ///                         ÓïÒôÊý¾Ý£¬±ØÐëΪ16bit²ÉÑù¾«¶È
    /// @param srcLength[in]    wave buffer length, in short count
    ///                         ÓïÒôÊý¾ÝµÄ²ÉÑùµã¸öÊý
    /// @param srcTags[in]      peak tags read from speech database
    ///                         each tag indicate the peak position offset to the first sample of wave
    ///                         ÓïÒôÊý¾ÝµÄ·åÖµµã±ê×¢Êý×é
    ///                         ÄÚ²¿±£´æÿ¸ö·åÖµµãÏà¶ÔÓïÒôÆðʼµãµÄÆ«ÒÆλÖÃ
    /// @param tagNumber[in]    peak tag count of srcTags
    ///                         ·åÖµ±ê×¢¸öÊý
    /// @param pitchRatio[in]   modification ratio of pitch
    ///                         Ä¿±ê»ùƵÖÜÆÚÐ޸ıÈÀý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
    /// @param durationRatio[in]modification ratio of duration
    ///                         Ä¿±êÓïÒôʱ³¤Ð޸ıÈÀý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
    /// @param specRatio [in]   modification ratio of spectra
    ///                         ƵÆ×Ð޸ıÈÀý£¬0Ϊ²»ÐÞ¸Ä
    /// @param trgWave[out]     modified wave, buffer should be allocated outside
    ///                         Ä¿±êÓïÒôÊý¾Ý»º³åÇø£¬ÓÉÍⲿ·ÖÅ䣬Îñ±Ø±ÈtrgDurationÒª´óһЩ
    /// @param sampleRate[in]   Sample count per second, default is 16000
    ///                         ²ÉÑùÂÊ£¬Ò»°ãÇëʹÓÃ16000
    ///
    /// @return                 true if modified successfully
    ///                         false if not, then the content of trgWave if un-defined
    ///
    PSOLA_DLL_EXPORTS unsigned PSOLA_ModifyRatio(
        const short *   srcWave,
        unsigned        srcLength,
        const unsigned *srcTags,
        unsigned        tagNumber,
        float           pitchRatio,
        float           durationRatio,
        float           specRatio,
        short *         trgWave,
        unsigned        trgBufferLength,
        unsigned        sampleRate);

#ifdef _cplusplus
}
#endif

#endif


Example PSOLA Method

PSOLA. h Header


#ifndef PSOLA_H_
#define PSOLA_H_
#include <vector>
#include "DSP.h"

using namespace std;

class CPsola{
public:
CPsola();
CPsola(short*,unsigned);
void SetData(short*,unsigned);
void SetAmplitudeMultiple(float);
void SetDuration(float);
void SetPitch(float*,unsigned,float);
void SetNewPitch(float*,unsigned);
void SetSampleFrequency(unsigned);
void SetFrameLength(float);
void SetX1(float);
void Adjust();

void TD_PSOLA(float,float);
void PSOLA(float,float,bool);

unsigned GetNewLen();
short* GetNewData();
~CPsola();
private:
unsigned FindMax(unsigned,unsigned,short*);
int Approximate(float);
short Middle(unsigned,short*);
bool MarkPitch();
void MarkOneFrame(unsigned,unsigned);
void AdjustAmplitude();
void AdjustDuration();
void AdjustPitch();
void Smooth(short*,unsigned);
private:
unsigned m_uSamFre;
float m_dFrameLen;
float m_dX1;
float m_dAmpMul;
float m_dDuration;
unsigned m_uPitchLen;
float* m_dPitch;
float* m_dNewPitch;
unsigned m_uDataLen;
;
// unsigned m_uNewPitchLen;
short* m_Data;
bool* flag;
short* m_InData;
CDSP m_filter;

void GetPitchMarks(vector<unsigned>&);
bool IsVowel(unsigned);
int GetAvgPitchLen(vector<unsigned>&,int&);
void GetFinal(vector<unsigned>&,vector<unsigned>&,
 int,vector<int>&,vector<vector<unsigned> >&);
void GetUseds(int,int,int,vector<int>&);
void smooth(short*,unsigned,vector<float>&);
void OverlapAdd(vector<vector<unsigned> >& final, short* y, unsigned ylen,
vector<float>& w, float* pBeta = NULL);
public:
void PSOLA(float,float*,int,float);

};

#endif

DSP.h Digital Signal Processing C++ Header


#pragma once
#include <vector>
#include <cmath>
#include <complex>
#include <iostream>
#include <algorithm>
using namespace std;

#define PI 3.1415926
#define FLOAT_MAX (float)1.0e37


class CDSP
{
public:
CDSP(void);
~CDSP(void);
void FFT(const vector<short>& s, vector<complex<float> >& spec, bool invert=false);
// void FFT (float *x, float *y, int n, int m);
float AutoCorrelate(const vector<short>& s, vector<float>& r, int p);
void Wave2LPC(const vector<short>& s, vector<float>& a, vector<float>& k, int p, float *re, float *te);
void LPC2RefC(const vector<float>& a, vector<float>& k);
void RefC2LPC(const vector<float>& k, vector<float>& a);
void LPC2Cepstrum(const vector<float>& a, vector<float>& c);
void Cepstrum2LPC(const vector<float>& c, vector<float>& a);
void SpecModulus(const vector<complex<float> >& spec, vector<float>& m);
void LSF2LPC(const vector<float>& lsf, vector<float>& a_coef);
void LPC2LSF(const vector<float>& a, vector<float>& lsf);
void window(vector<float>& wgt, int len, string type);
void LPCSpec(const vector<float>& a, int len, float G, vector<complex<float> >& spec);
float GetG(const vector<short>& s, const vector<float>& a);
void ExcitationSpec(const vector<complex<float> >& WavSpec, const vector<complex<float> >& LpcSpec, vector<complex<float> >& ExcSpec);
private:
float Durbin(vector<float>& k, vector<float>& a, vector<float>& r, float E, int p);
// void cheby(vector<float>& g, int order);
// void cacm283(vector<float>& a,int order,vector<float>& r);
template <typename T> inline void swap(T& a,T& b) { T temp=a; a = b; b = temp; }
};

PSOLA.CPP TD-Psola TTS C++ Code


///////////////////////////////////////////////////////////////////////
// CopyRight     : Copyright (c) 2004, IRLab, All rights reserved
// File Name     : Psola.cpp
// File Summary  : This file provide the modules to adjust the wav
// with TD-Psola
// Author        : elevens
// Create Time   : 2004/8/25
// Project Name  : TTS
// Version       : 1.0
// Histroy       :
////////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include <fstream>
#include <iostream>
#include <assert.h>
#include <algorithm>
#include <string>
#include <math.h>
#include <vector>
#include "Psola.h"
#include ".\psola.h"
//#include ".\psola.h"

#define __DEBUG

using namespace std;

#ifdef __DEBUG
ofstream out("out.txt");
#endif
///////////////////////////////////////////////////////////////////////
// Function Name : CPsola
// Function func : constructor of CPsola object, used to init members of the class              
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         :
// output        :
////////////////////////////////////////////////////////////////////////
CPsola::CPsola()
:m_dPitch(NULL),m_dNewPitch(NULL),flag(NULL),m_Data(NULL),m_InData(NULL)
{
m_uSamFre = 16000;
m_dFrameLen = 0.02;
m_dX1 = 0.02;
m_dAmpMul = -1;
m_dDuration = -1;
m_uPitchLen = 0;

}

///////////////////////////////////////////////////////////////////////
// Function Name : CPsola
// Function func : constructor of CPsola object, used to init members of the class              
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : _Data: wav data
//                  _DataLen: the length of the data
// output        :
////////////////////////////////////////////////////////////////////////
CPsola::CPsola(short* _Data,unsigned _DataLen)
:m_Data(_Data),m_uDataLen(_DataLen),m_dPitch(NULL),m_dNewPitch(NULL),flag(NULL),m_InData(_Data)
{
m_uSamFre = 16000;
m_dFrameLen = 0.02;
m_dX1 = 0.02;
m_dAmpMul = -1;
m_dDuration = -1;
m_uPitchLen = 0;

// m_uNewPitchLen = 0;
}


///////////////////////////////////////////////////////////////////////
// Function Name : FindMax
// Function func : find the max value of data between begin and end              
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : begin: which position ti begin whith
//                  end: the end position
//                  data: the array of wav data
// output        : the position of the max value
////////////////////////////////////////////////////////////////////////
unsigned CPsola::FindMax(unsigned begin,unsigned end,short* data)
{
unsigned k,position;
short max = -32768;

for(k=begin; k<=end; ++k)
{
if(data[k] > max)
{
max = data[k];
position = k;
}
}

return position;
}


///////////////////////////////////////////////////////////////////////
// Function Name : Approximate
// Function func : Change a float to a int using the round rule
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : temp: the float value
// output        : the int value
////////////////////////////////////////////////////////////////////////
int CPsola::Approximate(float temp)
{
int value = static_cast<int>(temp);
float decimal = temp - value;
if( (decimal > -0.5) && (decimal < 0.5) )
return value;
else if(decimal < 0)
return (value - 1);
else
return (value + 1);
}

///////////////////////////////////////////////////////////////////////
// Function Name : Middle
// Function func : Find the middle value of five numbers
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : mid: the position to start with
//                  data: the wav data
// output        : the middle value
////////////////////////////////////////////////////////////////////////
short CPsola::Middle(unsigned mid,short* data)
{
short k,max1,max2,max3;

max1 = max2 = max3 = -32768;

for(k=1; k<=4; k++)
{
if(data[mid+k] > max1)
{
max3 = max2;
max2 = max1;
max1 = data[mid+k];
}
else if(data[mid+k] > max2)
{
max3 = max2;
max2 = data[mid+k];
}
else if(data[mid+k] > max3)
{
max3 = data[mid+k];
}

}
return max3;
}

///////////////////////////////////////////////////////////////////////
// Function Name : SetData
// Function func : Set the Data set
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : data: a pointer to the data
//                  length: the length of the data set
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::SetData(short* data,unsigned length)
{
if(flag != NULL)
delete [] flag;
if((m_Data != m_InData) && (m_Data != NULL))
delete [] m_Data;
flag = NULL;
m_Data = data;
m_uDataLen = length;
m_InData = m_Data;

}

///////////////////////////////////////////////////////////////////////
// Function Name : SetAmplitudeMultiple
// Function func : Set the Mutiple of the Amplitude
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : temp: the Mutiple to be set
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::SetAmplitudeMultiple(float temp)
{
if(temp >= 0)
m_dAmpMul = temp;
}


///////////////////////////////////////////////////////////////////////
// Function Name : SetDuration
// Function func : Set the Duration to adjust to
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : temp: the Duration to be set
// output        :
////////////////////////////////////////////////////////////////////////
 void CPsola::SetDuration(float temp)
{
if(temp >= 0)
m_dDuration = temp;
}


///////////////////////////////////////////////////////////////////////
// Function Name : SetFrameLength
// Function func : Set the length of one frame
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : temp: the length of frame to be set
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::SetFrameLength(float temp)
{
if(temp >= 0)
m_dFrameLen = temp;
}


///////////////////////////////////////////////////////////////////////
// Function Name : SetSampleFrequency
// Function func : Set the sample frequency
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : temp: the sample frequency to be set
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::SetSampleFrequency(unsigned temp)
{
m_uSamFre = temp;
}


///////////////////////////////////////////////////////////////////////
// Function Name : SetX1
// Function func : Set the length of the first frame
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : temp: the length of the first frame
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::SetX1(float temp)
{
if(temp >= 0)
m_dX1 = temp;
}


///////////////////////////////////////////////////////////////////////
// Function Name : SetPitch
// Function func : Set the pitches of the wav
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : pitch: the array of the pitches
//                  size: the length of the pitches
//                  X1: the length of the first frame
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::SetPitch(float* pitch,unsigned size,float X1)
{
m_dPitch = pitch;
m_uPitchLen = size;
if(X1 >= 0)
m_dX1 = X1;

if(!MarkPitch())
return;
}


///////////////////////////////////////////////////////////////////////
// Function Name : SetNewPitch
// Function func : Set the pitches of the wav
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : pitch: the array of the pitches
//                  size: the length of the pitches
//                  X1: the length of the first frame
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::SetNewPitch(float* newpitch,unsigned newsize)
{
if(newsize != m_uPitchLen)
return;
m_dNewPitch = newpitch;

}



///////////////////////////////////////////////////////////////////////
// Function Name : MarkPitch
// Function func : Mark pitches on the wav
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         :
// output        : if the array of pitches is longer, return false
////////////////////////////////////////////////////////////////////////
bool CPsola::MarkPitch()
{
if( (m_dX1+(m_uPitchLen-2)*m_dFrameLen)*m_uSamFre >= m_uDataLen)
{
cout<<"error in markpitch"<<endl;
return false;
}
if(flag!=NULL)
{
delete [] flag;
flag = NULL;
}
flag = new bool[m_uDataLen];
memset(flag,0,m_uDataLen*sizeof(bool));
flag[0] = 1;
unsigned first = static_cast<unsigned>(m_dX1*m_uSamFre);
unsigned Pmax =0;
unsigned begin = 0;
unsigned end = 0;
for(unsigned i=0 ;i<m_uPitchLen ;++i)
{
// cout<<i<<endl;
if(i == 0)
begin = 0;
else
begin = static_cast<unsigned>((i-1)*m_dFrameLen*m_uSamFre) + first;
if(m_dPitch[i] > 0)
{

end = static_cast<unsigned>(i*m_dFrameLen*m_uSamFre) + first;
if(i == m_uPitchLen-1)
end = m_uDataLen - 1;
Pmax = FindMax(begin,end,m_Data);
// cout<<"Pmax "<<Pmax<<endl;
flag[Pmax] = 1;
MarkOneFrame(i,Pmax);
}
else
{
Pmax = begin + static_cast<unsigned>(m_dFrameLen*m_uSamFre/2);
if(Pmax < m_uDataLen-1)
flag[Pmax] = 1;
// cout<<Pmax<<endl;
Pmax += static_cast<unsigned>(m_dFrameLen*m_uSamFre/2);
// cout<<Pmax<<endl;
if(Pmax < m_uDataLen-1)
flag[Pmax] = 1;
}
}

// for(int i=0 ;i<m_uDataLen ;++i)
// if(flag[i])
// out<<i<<endl;
return true;

}


///////////////////////////////////////////////////////////////////////
// Function Name : MarkOneFrame
// Function func : Mark pitches on one frame
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : j: the jth Frame
//                  Pmax: the position of the max value
// output        :
////////////////////////////////////////////////////////////////////////

void CPsola::MarkOneFrame(unsigned j,unsigned Pmax)
{
unsigned first = static_cast<unsigned>(m_dX1*m_uSamFre);
if(m_dPitch[j] < 0)
return;
float T = 1/m_dPitch[j];
int p,p1,p2,i;
p2 = p1 = Pmax;
p = 0;
int temp;

int end = (j == (m_uPitchLen-1) )?static_cast<int>(m_uDataLen-1-T*m_uSamFre):static_cast<int>(j*m_dFrameLen*m_uSamFre+first-T*m_uSamFre);

while(p1 <= (end - 5) )
{
p = FindMax(static_cast<unsigned>(p1+T*m_uSamFre-5),static_cast<unsigned>(p1+T*m_uSamFre+5),m_Data);
flag[p] = 1;
p1 = p;
}
if (p!=0 && p-end<=5 && p+T*m_uSamFre+5<m_uDataLen )
{
p = FindMax(static_cast<unsigned>(p+T*m_uSamFre-5),static_cast<unsigned>(p+T*m_uSamFre+5),m_Data);
flag[p] = 1;
}

//´Ó×î´ó·åÖµµãÏòÇ°±ê×¢
unsigned begin = (j == 0)?static_cast<unsigned>(T*m_uSamFre):static_cast<unsigned>((j-1)*m_dFrameLen*m_uSamFre+first+T*m_uSamFre);
while(p2 >= (begin + 5) )
{
   p = FindMax(static_cast<unsigned>(p2-T*m_uSamFre-5),static_cast<unsigned>(p2-T*m_uSamFre+5),m_Data);
flag[p] = 1;
p2 = p;
}
if (p!=0 && begin-p<=5 && p-T*m_uSamFre-5>0 )
{
p = FindMax(static_cast<unsigned>(p-T*m_uSamFre-5),static_cast<unsigned>(p-T*m_uSamFre+5),m_Data);
flag[p] = 1;
temp = p+20 >= m_uDataLen ? m_uDataLen-1 : p+20;
for( i = p+1 ; i <= temp ; i++ )
{
if(flag[i]==1)
{
if(m_Data[i]>m_Data[p]) flag[p] = 0;
else flag[i] = 0;
break;
}
}
}
}


///////////////////////////////////////////////////////////////////////
// Function Name : Adjust
// Function func : Adjust the wav
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         :
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::Adjust()
{
AdjustAmplitude();
AdjustDuration();
// AdjustPitch();
}


///////////////////////////////////////////////////////////////////////
// Function Name : AdjustAmplitude
// Function func : Adjust the Amplitude of the wav
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         :
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::AdjustAmplitude()
{
if(m_dAmpMul == -1)
return;
// cout<<m_uDataLen<<endl;
for(unsigned i=0 ;i<m_uDataLen ;++i)
{
m_Data[i] = static_cast<short>(m_dAmpMul*m_Data[i]);
}
}


///////////////////////////////////////////////////////////////////////
// Function Name : AdjustDuration
// Function func : Adjust the Duration of the wav
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         :
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::AdjustDuration()
{
if(m_dDuration == -1)
{
short* NewData = new short[m_uDataLen];
memcpy(NewData,m_Data,m_uDataLen*2);
m_Data = NewData;
return;
}

if(flag == NULL)
return;

float sum = 0;
int nframe = 0;

int nFirstVowel = 0;

for(unsigned i=0 ;i<m_uPitchLen-1 ;++i)
// for(unsigned i=0 ;i<m_uPitchLen ;++i)
{
if(m_dPitch[i] > 0)
{
if( nFirstVowel == 0 )
{
nFirstVowel = i;
continue;
}
sum += m_dPitch[i];
++nframe;
}
}

if(nframe == 0)
return;

short* NewData;
unsigned NewLen = static_cast<unsigned>(m_dDuration*m_uSamFre);
if(NewLen < m_uDataLen)
NewData = new short[m_uDataLen];
else
NewData = new short[NewLen];
memset(NewData,0,NewLen*2);

float avg = sum/nframe; //¸ÃÒô½Ú»ùƵµÄƽ¾ùÖµ
//Ò»¹²ÐèÒªÔö¼Ó»ò¼õÉÙµÄÖÜÆÚ¸öÊý
int p_add = Approximate((static_cast<int>(NewLen) - static_cast<int>(m_uDataLen))*avg/m_uSamFre);
//ÿ¸öÖÜÆÚ¸´ÖƼ¸´Î
int every1 = static_cast<int>(static_cast<float>(p_add)/(sum*m_dFrameLen));
int every2 = every1;
//ÿFrame¼õ¼¸¸öÖÜÆÚ
int every3 = -(p_add/nframe);

int every4 = every3;
unsigned end = 0;
int oldpos = 0;
int n = 1;
unsigned t=0;
int pos = 0; //ÓàÊý
if(p_add > 0)
{
pos = Approximate(p_add - every1*sum*m_dFrameLen);
p_add = p_add + every1 + 1;
}
else
{
pos = p_add + every3*nframe;
p_add = p_add - every3;
}

unsigned uLastFrameLen = static_cast<unsigned>(m_uDataLen - m_dX1*m_uSamFre)%static_cast<unsigned>(0.02*m_uSamFre);

//Ìø¹ýµÚÒ»¸öÔªÒôFrame
unsigned skip = m_dX1*m_uSamFre + nFirstVowel*m_dFrameLen*m_uSamFre;
unsigned k = 0 ;
unsigned pos_start;

if(pos>=0)
pos_start = m_uDataLen - uLastFrameLen - (m_uSamFre/avg)*pos;
else
pos_start = m_uDataLen - uLastFrameLen + (m_uSamFre*m_dFrameLen)*pos;

for(unsigned i=0 ;i<m_uDataLen-uLastFrameLen/*Ìø¹ý×îºóÒ»¸öFrame*/ ;++i)
{
if(flag[i] != 0)
{
if( i < skip || ((i-t) >= 150) || ((i-t) <= 10) )
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
end = end+i-t;
t = i;
}
}  
else //×ÇÒô
{
every1 = every2;
if(p_add > 0) //ÑÓ³¤
{
while(every1 >= 0)
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
end = end + i - t;
}
--every1;
}
if(pos != 0 && pos_start-i <= m_uSamFre/avg) //ÓàÊý´ÓºóÃæ¼Ó
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
--pos;
end = end + i - t;
}
--p_add;
}
p_add = p_add - every2;
t=i;
}


//Ëõ¶Ì
else if(p_add < 0)
{
oldpos = Approximate(m_dFrameLen*avg);
if(every3 <= 0)
{
if(end < NewLen-i+t)
{
memcpy(NewData+end ,m_Data+t ,(i-t)*2);
end = end + i - t;
}
++n;
}
else
{
t = i;
--every3;
++p_add;
++n;
}
//¿ØÖÆ´Óÿ֡ÖÐɾ³ý
if(n == oldpos)
{
//´¦Àíp_addµÄÓàÊý²¿·Ö
if(pos <= 0 && i>=pos_start)
{
end = end - (i - t);
++pos;
++p_add;
}
every3 = every4;
n = 1;
}
t = i;
}

}

}
}
//×îºóÒ»¸öFrame

if( end+m_uDataLen-t < NewLen )
{
memcpy(NewData+end,m_Data+t,(m_uDataLen-t)*2);
end += m_uDataLen-t;
}
else if(end < NewLen)
{
memcpy(NewData+end,m_Data+t,(NewLen-end)*2);
end = NewLen;
}

m_Data = NewData;
m_uDataLen = end ;
}


///////////////////////////////////////////////////////////////////////
// Function Name : AdjustPitch
// Function func : Adjust the Pitches of the wav
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         :
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::AdjustPitch()
{
if(m_dNewPitch == NULL)
return;
if(flag == NULL)
return;

unsigned NewLen = 0;
unsigned i;
for( i=1 ;i<m_uPitchLen ;++i)
// NewLen += static_cast<unsigned>(((m_dNewPitch[i]/m_dPitch[i]) + 1)*m_dFrameLen*m_uSamFre);
NewLen += static_cast<unsigned>((static_cast<float>(m_dNewPitch[i])/static_cast<float>(m_dPitch[i]) + 1)*m_dFrameLen*m_uSamFre);
unsigned x1 = static_cast<unsigned>(m_dX1*m_uSamFre);
NewLen += x1;
short* NewData = new short[NewLen];

bool* NewFlag = new bool[NewLen];
memset(NewFlag,0,NewLen*sizeof(bool));

unsigned end = 0;
memcpy(NewData,m_Data,x1*sizeof(short));
end += x1;

unsigned oldpos = 0;
unsigned pos = 0;
int t = 0;
int pit = 0;
unsigned win_length = 0;
unsigned spos = 0;
unsigned epos = 0;

short* FrontWin = new short[Approximate((2.0/50)*m_uSamFre) + 1];
short* BehindWin = new short[Approximate((2.0/50)*m_uSamFre) + 1];
float hanning = 0;
vector<unsigned> vecPos;
for( i=1 ;i<m_uPitchLen ;++i)
{
if(m_dNewPitch[i] == 0)
continue;
oldpos = static_cast<unsigned>((m_dX1 + (i - 1)*m_dFrameLen)*m_uSamFre);
pos = static_cast<unsigned>((m_dX1 + i*m_dFrameLen)*m_uSamFre);
if(i == m_uPitchLen)
pos = m_uDataLen;
//»ùƵûÓиıäµÄ»òÊÇÔ­À´ÊÇÇåÒôµÄ
if((m_dNewPitch[i] == m_dPitch[i]) || (m_dPitch[i] == 0) )
{
memcpy(NewData+end ,m_Data+oldpos ,pos-oldpos);
end = end + pos - oldpos;
continue;
}

vecPos.clear();
for(unsigned k = oldpos ;k<pos ;++k)
if(!flag[k])
vecPos.push_back(k);
memcpy(NewData+end ,m_Data+oldpos ,vecPos[0]-oldpos);
end = end + vecPos[0] - oldpos;
NewFlag[end] = 1;

t = Approximate((1/m_dPitch[i])*m_uSamFre);
win_length = 2*t + 1;
pit = Approximate((1/m_dNewPitch[i] - 1/m_dPitch[i])*m_uSamFre);

spos = vecPos[0] - t;
epos = vecPos[0] + t;
if(vecPos[0] < t)
spos = 0;
if(epos > (m_uDataLen - 1) )
epos = m_uDataLen - 1;
for( i=spos ;i<epos+1 ;++i)
{
hanning = 0.5 - 0.5*cos(2*(i-spos)*3.1415926/(win_length-1));
BehindWin[i-spos] = static_cast<short>(m_Data[i]*hanning);
}

if(pit > 0)
{
for(size_t k=1 ;k<vecPos.size()-1 ;++k)
{
short* temp = FrontWin;
FrontWin = BehindWin;
BehindWin = FrontWin;

spos = vecPos[k] - t;
epos = vecPos[k] + t;
if(vecPos[k] < t)
spos = 0;
if(epos > (m_uDataLen - 1) )
epos = m_uDataLen - 1;
for( i=spos ;i<epos+1 ;++i)
{
hanning = 0.5 - 0.5*cos(2*(i-spos)*3.1415926/(win_length-1));
BehindWin[i-spos] = static_cast<short>(m_Data[i]*hanning);
}
// if(end > 1) //²»Ì«¿ÉÄÜend<2
NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*(FrontWin[t+pit] + BehindWin[0]) + 0.15*(FrontWin[t+pit+1] + BehindWin[1]);
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*(FrontWin[t+pit] + BehindWin[0]) + 0.15*(FrontWin[t+pit+1] + BehindWin[1]);
++end;
//Ç°°ë²¿·ÖµÄµþ¼Ó
//ÕâÀïÔÚµÚÒ»Ö¡ºÍ×îºóÒ»Ö¡»¹ÓÐÒ»¶¨µÄÎÊÌâ
for( i=1 ;i<=t-pit ;++i)
{
NewData[end] = FrontWin[t+pit+i] + BehindWin[i];
++end;
}
//ÖÐÐIJ¿·Ö²»ÐèÒªµþ¼Ó
//¶ÔÁ½²¿·ÖµÄÏàÁÚµã×÷ƽ»¬
NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*BehindWin[t-pit+1] + 0.15*BehindWin[t-pit+2];
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*BehindWin[t-pit+1] + 0.15*BehindWin[t-pit+2];
++end;
//ºó°ë²¿·Ö
for( i=t-pit+2 ;i<t+pit ;++i)
{
NewData[end] = BehindWin[i];
++end;
}
NewFlag[end] = 1;
}
//´¦Àí×îºóÒ»¸ö´°µÄÊý¾Ý
for( i=1 ;i<t-pit-1 ;++i)
{
NewData[end] = FrontWin[t+pit+i] + BehindWin[i];
++end;
}
NewFlag[end] = 1;
}
else
{
pit = -pit;
for(size_t k=1 ;k<vecPos.size() ;++k)
{
short* temp = FrontWin;
FrontWin = BehindWin;
BehindWin = FrontWin;

spos = vecPos[k] - t;
epos = vecPos[k] + t;
if(vecPos[k] < t)
spos = 0;
if(epos > (m_uDataLen - 1) )
epos = m_uDataLen - 1;
for( i=spos ;i<epos+1 ;++i)
{
hanning = 0.5 - 0.5*cos(2*(i-spos)*3.1415926/(win_length-1));
BehindWin[i-spos] = static_cast<short>(m_Data[i]*hanning);
}


NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*(FrontWin[t] + BehindWin[pit]) + 0.15*(FrontWin[t+1] + BehindWin[pit+1]);
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*(FrontWin[t] + BehindWin[pit]) + 0.15*(FrontWin[t+1] + BehindWin[pit+1]);
++end;
for( i=1 ;i<t-pit-1 ;++i)
{
NewData[end] = FrontWin[t-pit+i] + BehindWin[i];
++end;
}
NewFlag[end] = 1;
}


NewData[end-1] = 0.15*NewData[end-2] + 0.4*NewData[end-1] + 0.3*m_Data[vecPos.back()] + 0.15*m_Data[vecPos.back()+1];
NewData[end] = 0.15*NewData[end-2] + 0.3*NewData[end-1] + 0.4*m_Data[vecPos.back()] + 0.15*m_Data[vecPos.back()+1];
++end;
for( i=vecPos.back() ;i<pos ;++i)
{
NewData[end] = m_Data[i];
++end;
}
}
}

Smooth(NewData,end);
delete [] FrontWin;
delete [] BehindWin;
delete [] flag;
flag = NewFlag;
m_Data = NewData;
m_uDataLen = end;


}


///////////////////////////////////////////////////////////////////////
// Function Name : Smooth
// Function func : Smooth the new data after pitch adjusting
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         : data; pointer to input data
// length: the length of the input data
// output        : pointer to the data after smoothing
////////////////////////////////////////////////////////////////////////
void CPsola::Smooth(short* data,unsigned length)
{
short* temp = new short[length];
unsigned i;
//ÖÐֵƽ»¬
for( i=2 ;i<length-2 ;++i)
temp[i] = Middle(i-2,data);
temp[length-2] = data[length-2];
temp[length-1] = data[length-1];

//ÏßÐÔƽ»¬
for( i=2 ;i<length-2 ;++i)
data[i] = 0.15*temp[i-2] + 0.2*temp[i-1] + 0.3*temp[i] + 0.2*temp[i+1] + 0.15*temp[i+2];
data[length-2] = temp[length-2];
data[length-1] = temp[length-1];
delete [] temp;

}


///////////////////////////////////////////////////////////////////////
// Function Name : GetNewLen
// Function func : get the new length of the new data
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         :
// output        : the length of the new data
////////////////////////////////////////////////////////////////////////
unsigned CPsola::GetNewLen()
{
return m_uDataLen;
}


///////////////////////////////////////////////////////////////////////
// Function Name : GetNewData
// Function func : get the pointer to the new data
// Author        : elevens
// Create Time   : 2004/8/16
// Class Name    : CPsola
// input         :
// output        : the pointer to the new data
////////////////////////////////////////////////////////////////////////
short* CPsola::GetNewData()
{
return m_Data;
}



CPsola::~CPsola()
{
/* freopen("out.txt","w",stdout);
for(int i=0,j=0; i< 18; i++)
{
if(flag[i]==1)
{
cout<<i-j<<endl;
j=i;
}
}
*/
if(flag != NULL)
delete [] flag;
if((m_Data != m_InData) && (m_Data != NULL))
delete [] m_Data;
}

///////////////////////////////////////////////////////////////////////
// Function Name : TD_PSOLA
// Function func :
// Author        : Taliux
// Create Time   : 2004/12/1
// Class Name    : CPsola
// input         : float tscale,float pscale
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::TD_PSOLA(float tscale,float pscale)
{

if(tscale==1 && pscale==1)
return ;
// MarkPitch();
vector<unsigned> vPM,vNewPM;
GetPitchMarks(vPM);
vNewPM=vPM;
int pos;
unsigned i,T0,pshift=0;
if(pscale!=1)
{
for(i=1;i<vPM.size();i++)
{
T0=vPM.at(i)-vPM.at(i-1);
if (IsVowel(vPM.at(i-1)))
{
if (pscale>1)
pshift=pshift-Approximate(T0*(pscale-1.0)/pscale); //»ùƵ¸Ä±ä¶ÔӦʱ³¤±ä»¯
else
pshift=pshift+Approximate(T0*(1.0/pscale-1.0));

}
vNewPM.at(i)=vPM.at(i)+pshift; //Éú³ÉÄ¿±ê»ùÒô±ê×¢ÐòÁÐ
}

}
float new_tscale=tscale*(double)vPM.back()/(double)vNewPM.back(); //Éú³ÉеĻùÒô±ê×¢ÐòÁкó¶Ôʱ³¤Ð޸IJÎÊýµÄµ÷Õû

// vector<unsigned>::iterator  iVowelBegin;
int avg=GetAvgPitchLen(vNewPM,pos);
if(avg==0)
return;
int tot = (vNewPM.back()*new_tscale - vNewPM.at(pos)) / avg;
if(tot<=0)
return;
int orl = vNewPM.size()-pos-1;
vector<int> useds;
GetUseds(orl,tot,vNewPM.size()-2,useds);

vector<vector<unsigned> > final;
GetFinal(vPM,vNewPM,pos,useds,final);

unsigned ylen=final.back()[0]+(final.back()[2]-final.back()[1]);

short* y = new short[ylen];
memset(y,0,ylen*2);
memcpy(y,m_Data,vNewPM.at(pos)*2);
vector<float> w;
OverlapAdd(final,y,ylen,w);
//ƽ»¬

if(pscale>1)
smooth(y,ylen,w);

m_Data=y;
m_uDataLen=ylen;

}

void CPsola::GetPitchMarks(vector<unsigned>& vPM)
{
vPM.clear();
// vPM.push_back(0);
for(unsigned i=0;i<m_uDataLen;i++ )
if(flag[i])
{
#ifdef __DEBUG
out<<i<<endl;
#endif
vPM.push_back(i);
}
return;
}

bool CPsola::IsVowel(unsigned i)
{
if(i<=m_dX1*m_uSamFre)
return m_dPitch[0]>0;
unsigned j;
j=(i-unsigned(m_dX1*m_uSamFre))/unsigned(m_dFrameLen*m_uSamFre)+1;
if(j>=m_uPitchLen) return false;
return m_dPitch[j]>0;
}

int CPsola::GetAvgPitchLen(vector<unsigned>& vPM,int& pos)
{
unsigned sum=0;
int count=0;
pos = 0;
for(int i=1;i<vPM.size()-1;i++)
{
if ( !IsVowel(vPM[i]) )
continue;
if ( pos==0 )
pos = i;
sum += vPM[i+1] - vPM[i];
count++;
}
if(count==0)
return 0;
return sum/count;
}


///////////////////////////////////////////////////////////////////////
// Function Name : PSOLA
// Function func :
// Author        : Taliux
// Create Time   : 2004/12/1
// Class Name    : CPsola
// input         : float tscale,float pscale
// output        :
////////////////////////////////////////////////////////////////////////
void CPsola::PSOLA(float tscale,float pscale,bool bFD = false)
{

if(tscale==1 && pscale==1)
return ;
// MarkPitch();
vector<unsigned> vPM,vNewPM;
GetPitchMarks(vPM);
vNewPM = vPM;
int pos;
unsigned i,j,T0,pshift=0;
if(pscale!=1)
{
for(i=1;i<vPM.size();i++)
{
T0=vPM.at(i)-vPM.at(i-1);

if (IsVowel(vPM.at(i-1)))
{
if (pscale>1)
pshift=pshift-Approximate(T0*(pscale-1.0)/pscale); //»ùƵ¸Ä±ä¶ÔӦʱ³¤±ä»¯
else
pshift=pshift+Approximate(T0*(1.0/pscale-1.0));

}
vNewPM.at(i)=vPM.at(i)+pshift; //Éú³ÉÄ¿±ê»ùÒô±ê×¢ÐòÁÐ
}

}
float new_tscale=tscale*(float)vPM.back()/(float)vNewPM.back(); //Éú³ÉеĻùÒô±ê×¢ÐòÁкó¶Ôʱ³¤Ð޸IJÎÊýµÄµ÷Õû

int avg = GetAvgPitchLen(vNewPM,pos);
if ( avg == 0 )
return;
int tot = (vNewPM.back()*new_tscale - vNewPM.at(pos)) / avg;
if ( tot <= 0 )
return;
int orl = vNewPM.size()-pos-1;
vector<int> useds;
GetUseds(orl,tot,vNewPM.size()-2,useds);

vector<vector<unsigned> > final;

GetFinal(vPM,vNewPM,pos,useds,final);

unsigned ylen=final.back()[0]+(final.back()[2]-final.back()[1]);

short* y = new short[ylen];
memset(y,0,ylen*2);
vector<float> w;
memcpy(y,m_Data,vNewPM.at(pos)*2);

float* pBeta = NULL;

if ( bFD)
{
pBeta = new float[final.size()];
for ( i = 0; i < final.size(); i++ )
pBeta[i] = pscale;
}

OverlapAdd(final,y,ylen,w,pBeta);

if ( pBeta ) delete [] pBeta;

//ƽ»¬
if(pscale>1)
smooth(y,ylen,w);

m_Data=y;
m_uDataLen=ylen;

}


void CPsola::PSOLA(float dur, float* pNewPitch, int nPitchLen, float dX1)
{
vector<unsigned> vPM,vNewPM;
int pos;
GetPitchMarks(vPM);
if(vPM.empty()) return;
vNewPM=vPM;
GetAvgPitchLen(vNewPM,pos);

unsigned i,j,k,T0,uvl,t=1;
int pshift=0;
float alpha,beta;

while ( pNewPitch[t] == 0 && t < nPitchLen ) t++;
uvl = (dX1+(t-1)*m_dFrameLen)*m_uSamFre;

int nvc = vNewPM.size() - pos -1;

beta = (float)uvl/(float)(vNewPM.at(pos));
// out<<"pos\t"<<pos<<"uvl\t"<<uvl<<endl;
vector<short> unvoiced(uvl);

//ÏßÐÔ²îÖµµ÷Õû¸¨Òô³¤¶È
for( i = 0; i < uvl; i++ )
{
j = i / beta;
alpha = (float)i/beta - j;
unvoiced.at(i) = (1-alpha)*m_Data[j]+alpha*m_Data[j+1];
}

pshift = uvl - vNewPM.at(pos);
vNewPM.at(pos) = uvl;
float pscale;
for ( i = pos+1; i < vPM.size(); i++ )
{
T0=vPM.at(i)-vPM.at(i-1);
k = (float)(i-pos)/(float)nvc * (nPitchLen-t) + t;
pscale = pNewPitch[k]/(m_uSamFre/(float)T0);
if ( pscale < 0.5 || pscale > 3 )
pscale = 1;
if (IsVowel(vPM.at(i-1)))
{
out<<pscale<<"\t";
if (pscale>1)
pshift=pshift-Approximate(T0*(pscale-1.0)/pscale); //»ùƵ¸Ä±ä¶ÔӦʱ³¤±ä»¯
else
pshift=pshift+Approximate(T0*(1.0/pscale-1.0));

}
// pshift += Approximate(1.0/pNewPitch[k]*m_uSamFre - T0);
vNewPM.at(i)=vPM.at(i)+pshift; //Éú³ÉÄ¿±ê»ùÒô±ê×¢ÐòÁÐ
}
out<<endl;
//ÎÒ²»ÏëÔÙÍæÁË£¡
int orl = nvc;
int avg = (vNewPM.back()-vNewPM.at(pos))/orl;
int tot = (dur*m_uSamFre-uvl)/avg;
out<<vNewPM.back()<<"\t"<<vNewPM.at(pos)<<endl;
vector<int> useds;
GetUseds(orl,tot,vNewPM.size()-2,useds);
out<<tot<<"\t"<<orl<<"\t"<<avg<<endl;
for(i=0;i<useds.size();i++) out<<useds[i]<<"\t";
out<<endl;

vector<vector<unsigned> > final;

GetFinal(vPM,vNewPM,pos,useds,final);
out<<"\t"<<final.size()<<endl;
unsigned ylen=final.back()[0]+(final.back()[2]-final.back()[1]);

short* y = new short[ylen];
memset(y,0,ylen*2);
memcpy(y,&unvoiced[0],unvoiced.size()*2);
vector<float> w;

float* pBeta = new float[final.size()];
for ( i = 0; i < final.size()-1; i++ )
pBeta[i] =
(float)(final.at(i)[2]-final.at(i)[1])/2/(float)(final.at(i+1)[0]-final.at(i)[0]);
pBeta[i] = pBeta[i-1];

OverlapAdd(final,y,ylen,w/*,pBeta*/);
delete[] pBeta;

smooth(y,ylen,w);

m_Data=y;
m_uDataLen=ylen;

}

void CPsola::GetUseds(int orl, int tot, int size, vector<int>& useds)
{
int m,n,i;
if(tot>orl)
{
n = tot/orl;
m = tot%orl;
useds = vector<int>(size,n);
if(m>0)
{
n = orl / m;   //ÿ¼¸¸ö¼ÓÒ»¸ö
for(i=1;i<=m;i++)
{
if(i*n>=orl)
break;
useds.at(size-i*n)++;
}
}

}
else
{
useds = vector<int>(size,1);
m = orl - tot;  //ÐèÒª¼ôµô¶àÉÙ»ùÒôÖÜÆÚ
if(m>0)
{
n = orl / m;   //ÿ¼¸¸ö¼õÒ»¸ö
for(i=1;i<=m;i++)
{
if(i*n>=orl)
break;
useds.at(size-i*n)=0;
}
}
}

}

void CPsola::GetFinal(vector<unsigned>& vPM,vector<unsigned>& vNewPM,
 int pos, vector<int>& useds, vector<vector<unsigned> >& final)
{
final.clear();
int start=vNewPM.at(pos),count=0,i,j;
for(i=pos;i<useds.size();i++)
{
if (useds.at(i)>0)
{
final.push_back(vector<unsigned>(3));
final.at(count)[0]=start;
final.at(count)[1]=vPM.at(i);
final.at(count)[2]=vPM.at(i+2);
count++;
start=start+vNewPM.at(i+1)-vNewPM.at(i);  
}
for(j=2;j<=useds.at(i);j++)
{
final.push_back(vector<unsigned>(3));
final.at(count)[0]=start;
final.at(count)[1]=vPM[i];
final.at(count)[2]=vPM[i+2];
count++;
start=start+vNewPM.at(i+1)-vNewPM.at(i);
}
}
}

void CPsola::smooth(short* y, unsigned ylen, vector<float>& w)
{
for ( unsigned i = 0; i < ylen; i++ )
{
if ( w.at(i) < 0.1 )
w.at(i)=1;
y[i] = y[i] / w.at(i);
}
}

void CPsola::OverlapAdd(vector<vector<unsigned> >& final, short* y, unsigned ylen,
vector<float>& w, float* pBeta)
{
unsigned i,j;
int k,kv;
vector<short> frm;
vector<float> wgt;
float alpha,beta;
w = vector<float>(ylen,0);
int len;
int numfrm = final.size();
float re,te;
int p=16;
vector<float> a,r;
vector<complex<float> > fft_spec,lpc_spec,exc_spec,new_exc;
unsigned start;
int temp = 0;
for ( i = 0; i < numfrm; i++ )
{
start=final.at(i)[0];
len=final.at(i)[2]-final.at(i)[1];
m_filter.window(wgt,len,"hanning");
frm=vector<short>(m_Data+final.at(i)[1],m_Data+final.at(i)[2]);

for(j=0;j<frm.size();j++)
frm.at(j) *= wgt.at(j);

if ( pBeta != NULL )
{
m_filter.Wave2LPC(frm,a,r,p,&re,&te);
m_filter.FFT(frm,fft_spec);
m_filter.LPCSpec(a,fft_spec.size(),sqrt(re),lpc_spec);
m_filter.ExcitationSpec(fft_spec,lpc_spec,exc_spec);
new_exc.clear();
new_exc.resize(exc_spec.size());
beta = pBeta[i];
if (beta < 0.5 || beta > 2) beta == 1;
for( k = 0; k < new_exc.size()/2; k++ )
{
kv = k / beta;
alpha = (float)k/beta - kv;
///////////////////
if(kv >= exc_spec.size()/2) //if beta<1 copy the tail of the spectrum
{
if ( temp == 0 ) temp = k-1;
if ( 2*temp-k < 0) temp = k-1;
// out<<k<<"\t"<<kv<<beta<<"\t"<<2*temp-k<<endl;
new_exc.at(k) = new_exc.at(2*temp-k);
}
else
new_exc.at(k) = (1-alpha)*exc_spec.at(kv)+alpha*exc_spec.at(kv+1);
new_exc.at(new_exc.size()-1-k) = conj(new_exc.at(k));
}

for( k = 0; k < lpc_spec.size(); k++ )
fft_spec.at(k) = lpc_spec.at(k)*new_exc.at(k);
m_filter.FFT(frm,fft_spec,true);
len /= beta;
frm.clear();
frm.resize(len);

for( k = 0 ; k < len; k++ )
frm.at(k) = real(fft_spec.at(k));
m_filter.window(wgt,len,"hanning");
}

for(j=start;j<start+len && j<ylen;j++)
{
y[j]=y[j]+frm.at(j-start);
w.at(j)=w.at(j)+wgt.at(j-start);
}
}
}