【yyddjtc】WSOLA算法--python实现

【yyddjtc】WSOLA算法--python实现,第1张

import librosa
import soundfile as sf
import matplotlib.pyplot as plt
import numpy as np
from playsound import playsound
y,sr = librosa.load('D:\\pythonProject\\harvard.wav')

def wsola(y,sr,rate,shiftm):
    Hs = sr * shiftm/1000
    f1 = Hs*2
    s = int(Hs)
    epstep = int(Hs * rate)
    win = np.hanning(f1)
    wlen = len(y)
    wsolaed = np.zeros(int(np.floor(wlen/rate)))
    sp = int(Hs * 2)
    rp = sp + s
    ep = sp + epstep
    outp = int(Hs)
    for i in range(outp):
        wsolaed[i] = y[i]
    data1 = np.zeros(outp)
    data2 = np.zeros(outp)
    for i in range(outp):
        data1[i] = y[sp+i]
    for i in range(outp):
        data2[i] = win[outp+i]
    #初始化
    spdata = [0 for i in range(len(data1))]
    for i in range(len(data1)):
        spdata[i] = data1[i]*data2[i]
    a = 1
    while wlen > ep + s*2:
        ref = y[rp - s +1 :rp +s]
        buff = y[ep - s*2 +1:ep + s]
        #寻找相似区域
        corr_max = 0
        corr = 0
        corr1 = np.zeros(len(ref))
        for i in range(len(buff)-s*2):
            compare = buff[i:i+s*2]
            for j in range(len(ref)):
                corr1[j] = ref[j]*compare[j]
            for r in range(len(corr1)):
                corr += corr1[r]
            if corr > corr_max:
                corr_max = corr
                delta = i - s
        epd = ep + delta
        #叠加(右半帧)
        data1 = np.zeros(s)
        data2 = np.zeros(s)
        for i in range(s):
            data1[i] = y[sp + i]
        for i in range(s):
            data2[i] = win[s + i]
        spdata = [0 for i in range(len(data1))]
        for i in range(len(data1)):
            spdata[i] = data1[i] * data2[i]
        #叠加(左半帧)
        data3 = np.zeros(s)
        data4 = np.zeros(s)
        for i in range(s):
            data3[i] = y[epd - s + i]
            data4[i] = win[i]
        epdata = [0 for i in range(len(data3))]
        for i in range(s):
            epdata[i] = data3[i]*data4[i]
        #叠加
        for i in range(s):
            wsolaed[outp*a + i] = spdata[i] + epdata[i]
        sp = epd
        rp = sp + s
        ep = ep + epstep
        a += 1
    return wsolaed

#储存并播放变速后音频
c = wsola(y,sr,1.25,10)
sf.write("D:\\pythonProject\\harvardc.wav", c, sr)
playsound('D:\\pythonProject\\harvardc.wav')

#比较重采样方式变速的音频
sr1 = round(sr*1.25)
sf.write("D:\\pythonProject\\harvardd.wav", y, sr1)
playsound('D:\\pythonProject\\harvardd.wav')

输入为音频时间序列,采样率,缩放程度,分帧长度。输出为变速后音频。对比重采样方式得到的变速音频,WSOLA算法得到的音频不会变调。

整理自: WSOLA原理及matlab仿真_ManiacLook的博客-CSDN博客_wsola算法https://blog.csdn.net/ManiacLook/article/details/119673630

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/langs/868305.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-13
下一篇 2022-05-13

发表评论

登录后才能评论

评论列表(0条)

保存