Sunday, 22 April 2012

Pitch Synchronous Overlap Add Method (PSOLA.CPP)


PSOLA.CPP

#include "../common/tdpsola.h"
#include "psola.h"

CPSOLA instance;

void PSOLA_EnableCosineSmooth(bool enable)
{
    instance.enableCosineSmooth(enable);
}

void PSOLA_SetSpectralMapping(bool useBezier, int x1, int y1, int x2, int y2)
{
    instance.setSpectralMapping(useBezier, x1, y1, x2, y2);
}

bool PSOLA_IsCosineSmoothEnabled()
{
    return instance.isCosineSmoothEnabled();
}

void PSOLA_EnableVoicelessExtension(int method)
{
    instance.setVoicelessExtension(method);
}

int PSOLA_GetVoicelessExtension()
{
    return instance.getVoicelessExtension();
}

unsigned PSOLA_ModifyPitchContour(
                            const short *   srcWave,
                            unsigned        srcLength,
                            const unsigned *srcTags,
                            unsigned        tagNumber,
                            const unsigned *trgPeriods,
                            unsigned        periodNumber,
                            unsigned        trgDuration,
                            float           specRatio,
                            short *         trgWave,
                            unsigned        trgBufferLength,
                            unsigned        sampleRate)
{
    return instance.modifyPitchContour(srcWave, srcLength, srcTags, tagNumber, trgWave, trgBufferLength, trgPeriods, periodNumber, trgDuration, specRatio, sampleRate);
}

unsigned PSOLA_Modify(
                const short *   srcWave,
                unsigned        srcLength,
                const unsigned *srcTags,
                unsigned        tagNumber,
                unsigned        trgPitch,
                unsigned        trgDuration,
                float           specRatio,
                short *         trgWave,
                unsigned        trgBufferLength,
                unsigned        sampleRate)
{
    return instance.modify(srcWave, srcLength, srcTags, tagNumber, trgWave, trgBufferLength, trgPitch, trgDuration, specRatio, sampleRate);
}

unsigned PSOLA_ModifyRatio(
                     const short *      srcWave,
                     unsigned           srcLength,
                     const unsigned *   srcTags,
                     unsigned           tagNumber,
                     float              pitchRatio,
                     float              durationRatio,
                     float              specRatio,
                     short *            trgWave,
                     unsigned           trgBufferLength,
                     unsigned           sampleRate
                     )
{
    return instance.modifyRatio(srcWave, srcLength, srcTags, tagNumber, trgWave, trgBufferLength, pitchRatio, durationRatio, specRatio, sampleRate);
}









PSOLA.H


///
/// Modify wave using TP-PSOLA algorithm
///
/// @version 1.0.0
/// @author Jun Xu
/// @date 2007/07/18
///
#ifndef _CST_PSOLA_PSOLA_H_
#define _CST_PSOLA_PSOLA_H_

#ifndef PSOLA_EXPORTS
#   define PSOLA_DLL_EXPORTS __declspec(dllimport)
#   ifdef _DEBUG
#       pragma comment(lib, "psolad.lib")
#       pragma message("Linking with psolad.dll")
#   else
#       pragma comment(lib, "psola.lib")
#       pragma message("Linking with psola.dll")
#   endif
#else
# define PSOLA_DLL_EXPORTS __declspec(dllexport)
#endif

#ifdef _cplusplus
extern "C" {
#endif

    #define PSOLA_VLPPMETHOD_NONE   0   ///< ÇåÒô¶Î²»×ö»ùƵÀ©Õ¹
    #define PSOLA_VLPPMETHOD_FIXED  1   ///< ÇåÒô¶Î×ö¹Ì¶¨ÖÜÆڵĻùƵÀ©Õ¹
    #define PSOLA_VLPPMETHOD_EQUAL  2   ///< ÇåÒô¶Î×öÓëµÚÒ»¸öÖÜÆÚÏàµÈµÄµÈÖÜÆÚÀ©Õ¹
    #define PSOLA_VLPPMETHOD_PEAK   3   ///< ÇåÒô¶Î¸ù¾ÝÓïÒô¼â·åµãÀ´×öÖÜÆÚÀ©Õ¹
    #define PSOLA_VLPPMETHOD_AUTO   4   ///< ×Ô¶¯×öÇåÒôÀ©Õ¹£¨¸ù¾Ýʱ³¤Ð޸ıÈÀý¾ö¶¨£©
    #define PSOLA_VLPPMETHOD_MAX    4

    ///
    /// ÉèÖÃÐ޸Ĺý³ÌÖеÄƵÆ×Ó³É䷽ʽ£¬Ð§¹û²»ºÃ£¬É÷ÓÃ
    ///
    /// @param useBezier    true:ʹÓñ´Èû¶ûÇúÏß,false:ʹÓÃÕÛÏß
    /// @param x1,y1        µÚÒ»¸ö¿ØÖƵãµÄ×ø±ê
    /// @param x2,y2        µÚ¶þ¸ö¿ØÖƵãµÄ×ø±ê
    ///
    PSOLA_DLL_EXPORTS void PSOLA_SetSpectralMapping(bool useBezier, int x1, int y1, int x2, int y2);

    ///
    /// ÉèÖÃÇåÒô¶ÎÖÜÆÚÀ©Õ¹·½Ê½
    ///
    /// @param method       0-3£¬²Î¼ûÉÏÃæµÄºê¶¨Òå
    ///
    PSOLA_DLL_EXPORTS void PSOLA_EnableVoicelessExtension(int method);

    /// »ñÈ¡ÇåÒô¶ÎÖÜÆÚÀ©Õ¹·½Ê½
    PSOLA_DLL_EXPORTS int PSOLA_GetVoicelessExtension();

    ///
    /// ÆôÓÃÓàÏÒº¯Êý½øÐÐÆ´½Ó±ß½çƽ»¬
    ///
    /// @param enable       true:ÆôÓÃ,false:²»ÆôÓÃ
    ///
    PSOLA_DLL_EXPORTS void PSOLA_EnableCosineSmooth(bool enable);

    /// ÅжÏÓàÏұ߽çƽ»¬ÊÇ·ñ±»ÆôÓÃ
    PSOLA_DLL_EXPORTS bool PSOLA_IsCosineSmoothEnabled();

    ///
    /// Modify wave using PSOLA model
    /// ʹÓÃPSOLAÄ£ÐͽøÐÐÓïÒôÐ޸ģ¬Ö¸¶¨Ä¿±êµÄƽ¾ù»ùƵÖÜÆÚÒÔ¼°ÓïÒô³¤¶È
    ///
    /// @param srcWave[in]      wave buffer read from speech database
    ///                         ÓïÒôÊý¾Ý£¬±ØÐëΪ16bit²ÉÑù¾«¶È
    /// @param srcLength[in]    wave buffer length, in short count
    ///                         ÓïÒôÊý¾ÝµÄ²ÉÑùµã¸öÊý
    /// @param srcTags[in]      peak tags read from speech database
    ///                         each tag indicate the peak position offset to the first sample of wave
    ///                         ÓïÒôÊý¾ÝµÄ·åÖµµã±ê×¢Êý×é
    ///                         ÄÚ²¿±£´æÿ¸ö·åÖµµãÏà¶ÔÓïÒôÆðʼµãµÄÆ«ÒÆλÖÃ
    /// @param tagNumber[in]    peak tag count of srcTags
    ///                         ·åÖµ±ê×¢¸öÊý
    /// @param trgPitch[in]     predicted average pitch period
    ///                         trgPitch=0 means keeping pitch no change
    ///                         Ä¿±ê»ùƵÖÜÆڵĴóС£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
    /// @param trgDuration[in]  predicted wave duration, in short
    ///                         Ä¿±êÓïÒô²ÉÑùµã¸öÊý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
    /// @param specRatio [in]   modification ratio of spectra
    ///                         ƵÆ×Ð޸ıÈÀý£¬0Ϊ²»ÐÞ¸Ä
    /// @param trgWave[out]     modified wave, buffer should be allocated outside
    ///                         Ä¿±êÓïÒôÊý¾Ý»º³åÇø£¬ÓÉÍⲿ·ÖÅ䣬Îñ±Ø±ÈtrgDurationÒª´óһЩ
    /// @param sampleRate[in]   Sample count per second, default is 16000
    ///                         ²ÉÑùÂÊ£¬Ò»°ãÇëʹÓÃ16000
    ///
    /// @return                 true if modified successfully
    ///                         false if not, then the content of trgWave if un-defined
    ///
    PSOLA_DLL_EXPORTS unsigned PSOLA_Modify(
        const short *   srcWave,
        unsigned        srcLength,
        const unsigned *srcTags,
        unsigned        tagNumber,
        unsigned        trgPitch,
        unsigned        trgDuration,
        float           specRatio,
        short *         trgWave,
        unsigned        trgBufferLength,
        unsigned        sampleRate);

    ///
    /// Modify wave using PSOLA model
    /// ʹÓÃPSOLAÄ£ÐͽøÐÐÓïÒôÐ޸ģ¬Ö¸¶¨Ä¿±ê»ùƵÇúÏß
    ///
    /// @param srcWave[in]      wave buffer read from speech database
    ///                         ÓïÒôÊý¾Ý£¬±ØÐëΪ16bit²ÉÑù¾«¶È
    /// @param srcLength[in]    wave buffer length, in short count
    ///                         ÓïÒôÊý¾ÝµÄ²ÉÑùµã¸öÊý
    /// @param srcTags[in]      peak tags read from speech database
    ///                         each tag indicate the peak position offset to the first sample of wave
    ///                         ÓïÒôÊý¾ÝµÄ·åÖµµã±ê×¢Êý×é
    ///                         ÄÚ²¿±£´æÿ¸ö·åÖµµãÏà¶ÔÓïÒôÆðʼµãµÄÆ«ÒÆλÖÃ
    /// @param tagNumber[in]    peak tag count of srcTags
    ///                         ·åÖµ±ê×¢¸öÊý
    /// @param trgPeriods[in]   predicted pitch period
    ///                         Ä¿±ê»ùƵÖÜÆÚÊý×é
    /// @param periodNumber[in] pitch period count of target
    ///                         Ä¿±ê»ùƵÖÜÆÚÊýÄ¿
    /// @param trgDuration[in]  predicted wave duration, in short
    ///                         Ä¿±êÓïÒô²ÉÑùµã¸öÊý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
    /// @param specRatio [in]   modification ratio of spectra
    ///                         ƵÆ×Ð޸ıÈÀý£¬0Ϊ²»ÐÞ¸Ä
    /// @param trgWave[out]     modified wave, buffer should be allocated outside
    ///                         Ä¿±êÓïÒôÊý¾Ý»º³åÇø£¬ÓÉÍⲿ·ÖÅ䣬Îñ±Ø±ÈtrgDurationÒª´óһЩ
    /// @param sampleRate[in]   Sample count per second, default is 16000
    ///                         ²ÉÑùÂÊ£¬Ò»°ãÇëʹÓÃ16000
    ///
    /// @return                 true if modified successfully
    ///                         false if not, then the content of trgWave if un-defined
    ///
    PSOLA_DLL_EXPORTS unsigned PSOLA_ModifyPitchContour(
        const short *   srcWave,
        unsigned        srcLength,
        const unsigned *srcTags,
        unsigned        tagNumber,
        const unsigned *trgPeriods,
        unsigned        periodNumber,
        unsigned        trgDuration,
        float           specRatio,
        short *         trgWave,
        unsigned        trgBufferLength,
        unsigned        sampleRate);

    ///
    /// Modify wave using PSOLA model
    /// ʹÓÃPSOLAÄ£ÐͽøÐÐÓïÒôÐ޸ģ¬Ö¸¶¨ÖÜÆÚ£¬Ê±³¤µÄÐ޸ıÈÀý
    ///
    /// @param srcWave[in]      wave buffer read from speech database
    ///                         ÓïÒôÊý¾Ý£¬±ØÐëΪ16bit²ÉÑù¾«¶È
    /// @param srcLength[in]    wave buffer length, in short count
    ///                         ÓïÒôÊý¾ÝµÄ²ÉÑùµã¸öÊý
    /// @param srcTags[in]      peak tags read from speech database
    ///                         each tag indicate the peak position offset to the first sample of wave
    ///                         ÓïÒôÊý¾ÝµÄ·åÖµµã±ê×¢Êý×é
    ///                         ÄÚ²¿±£´æÿ¸ö·åÖµµãÏà¶ÔÓïÒôÆðʼµãµÄÆ«ÒÆλÖÃ
    /// @param tagNumber[in]    peak tag count of srcTags
    ///                         ·åÖµ±ê×¢¸öÊý
    /// @param pitchRatio[in]   modification ratio of pitch
    ///                         Ä¿±ê»ùƵÖÜÆÚÐ޸ıÈÀý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
    /// @param durationRatio[in]modification ratio of duration
    ///                         Ä¿±êÓïÒôʱ³¤Ð޸ıÈÀý£¬Èç¹ûΪ0Ôò±íʾ²»½øÐÐÐÞ¸Ä
    /// @param specRatio [in]   modification ratio of spectra
    ///                         ƵÆ×Ð޸ıÈÀý£¬0Ϊ²»ÐÞ¸Ä
    /// @param trgWave[out]     modified wave, buffer should be allocated outside
    ///                         Ä¿±êÓïÒôÊý¾Ý»º³åÇø£¬ÓÉÍⲿ·ÖÅ䣬Îñ±Ø±ÈtrgDurationÒª´óһЩ
    /// @param sampleRate[in]   Sample count per second, default is 16000
    ///                         ²ÉÑùÂÊ£¬Ò»°ãÇëʹÓÃ16000
    ///
    /// @return                 true if modified successfully
    ///                         false if not, then the content of trgWave if un-defined
    ///
    PSOLA_DLL_EXPORTS unsigned PSOLA_ModifyRatio(
        const short *   srcWave,
        unsigned        srcLength,
        const unsigned *srcTags,
        unsigned        tagNumber,
        float           pitchRatio,
        float           durationRatio,
        float           specRatio,
        short *         trgWave,
        unsigned        trgBufferLength,
        unsigned        sampleRate);

#ifdef _cplusplus
}
#endif

#endif


No comments:

Post a Comment