n=0.1%噪声水平
a=2
b=6
[speech,fs,nbits]=wavread('E:\matlab\louyin.wav')%读入wav文件
size=length(speech)%语音长度
numofwin=floor(size/winsize)%窗举斗数
%定义汉明窗
ham=hamming(winsize)'
hamwin=zeros(1,size)
enhanced=zeros(1,size)
improved=zeros(1,size)
%生成噪声信号
noise=n*randn(1,size)
y=speech'+noise
%噪声处理
noisy=n*randn(1,winsize)
N=fft(noisy)
npow=abs(N)
for q=1:2*numofwin-1
yframe=y(1+(q-1)*winsize/2:winsize+(q-1)*winsize/2)%分正正磨帧
hamwin(1+(q-1)*winsize/2:winsize+(q-1)*winsize/2)=hamwin(1+(q-1)*winsize/2:winsize+(q-1)*winsize/2)+ham%
%加噪信号FFT
y1=fft(yframe.*ham)
ypow=abs(y1)%加噪信号幅度
yangle=angle(y1)%相位
%计算功率谱密度
Py=ypow.^2
Pn=npow.^2
Pyy=ypow.^a
Pnn=npow.^a
%基本谱减
for i=1:winsize
if Py(i)-Pn(i)>0
Ps(i)=Py(i)-Pn(i)
else
Ps(i)=0
end
end
s=sqrt(Ps).*exp(j*yangle)
for i=1:winsize
if Pyy(i)-b*Pnn(i)>0
Pss(i)=Pyy(i)-b*Pnn(i)
else
Pss(i)=0
end
end
ss=Pss.^(1/a).*exp(j*yangle)
%去噪语音IFFT
enhanced(1+(q-1)*winsize/2:winsize+(q-1)*winsize/2)=enhanced(1+(q-1)*winsize/2:winsize+(q-1)*winsize/2)+real(ifft(s))
improved(1+(q-1)*winsize/2:winsize+(q-1)*winsize/2)=improved(1+(q-1)*winsize/2:winsize+(q-1)*winsize/2)+real(ifft(ss))
end
%去除汉明窗引起的增益
for i=1:size
if hamwin(i)==0
enhanced(i)=0
improved(i)=0
else
enhanced(i)=enhanced(i)/hamwin(i)
improved(i)=improved(i)/hamwin(i)
end
end
SNR1=10*log10(var(speech')/清或var(noisy))%加噪语音信噪比
SNR2=10*log10(var(speech')/var(enhanced-speech'))%增强语音信噪比
SNR3=10*log10(var(speech')/var(improved-speech'))
figure(1)plot(speech')%原始语音波形
title(['Original Voice(n=',num2str(n),')'])
figure(2)plot(y)
title(['Noise Added(SNR=',num2str(SNR1),'dB)'])
figure(3)plot(enhanced)
title(['Enhanced Voice(SNR=',num2str(SNR2),'dB)'])
figure(4)plot(improved)
title(['Improved Voice(SNR=',num2str(SNR3),'dB)'])
原来想注释完,有事要出去,基本的思想山州已写完,后面的看程序吧。不外乎是,分帧,加窗,估计噪音,傅里叶变换,反傅里叶变换等。
%输入参数搜谈s 语音数据,fs 采样频率,p 下面有说明,共11个,可不输入,有默认值
%“过度减法(oversubtraction)”作减法的时候,保留一小部分原来的背景噪音,用这部分背景噪音来掩盖住音世唯碰乐噪音的谱峰,从而消除了令人不悦的音乐噪音。
%通过给的参数p,估计噪音,做谱减法。从而消除噪音。
function [ss,po]=specsubm(s,fs,p)
%利用频谱相减(spectral subtraction)增强 [SS,PO]=(S,FS,P)
%
% implementation of spectral subtraction algorithm by R Martin (rather slow)
% algorithm parameters: t* in seconds, f* in Hz, k* dimensionless
% 1: tg = smoothing time constant for signal power estimate (0.04): high=reverberant, low=musical
% 2: ta = smoothing time constant for signal power estimate
%used in noise estimation (0.1)
% 3: tw = fft window length (will be rounded up to 2^nw samples)
% 4: tm = length of minimum filter (1.5): high=slow response to noise increase, low=distortion
% 5: to = time constant for oversubtraction factor (0.08)
% 6: fo = oversubtraction corner frequency (800): high=distortion, low=musical
% 7: km = number of minimisation buffers to use (4): high=waste memory, low=noise modulation
% 8: ks = oversampling constant (4)
% 9: kn = noise estimate compensation (1.5)
% 10:kf = subtraction floor (0.02): high=noisy, low=musical
% 11:ko = oversubtraction scale factor (4): high=distortion, low=musical
%检查函数的输入参数,如果输入少于三个,po为默认值,po的参数上面有说明
if nargin<3 po=[0.04 0.1 0.032 1.5 0.08 400 4 4 1.5 0.02 4].'else po=pend
ns=length(s)
ts=1/fs
ss=zeros(ns,1)
ni=pow2(nextpow2(fs*po(3)/po(8)))
ti=ni/fs
nw=ni*po(8)
nf=1+floor((ns-nw)/ni)
nm=ceil(fs*po(4)/(ni*po(7)))
win=0.5*hamming(nw+1)/1.08win(end)=[]
zg=exp(-ti/po(1))
za=exp(-ti/po(2))
zo=exp(-ti/po(5))
px=zeros(1+nw/2,1)
pxn=px
os=px
mb=ones(1+nw/2,po(7))*nw/2
im=0
osf=po(11)*(1+(0:nw/2).'*fs/(nw*po(6))).^(-1)
imidx=[13 21]'
x2im=zeros(length(imidx),nf)
osim=x2im
pnim=x2im
pxnim=x2im
qim=x2im
for is=1:nf
idx=(1:nw)+(is-1)*ni
x=rfft(s(idx).*win)
x2=x.*conj(x)
pxn=za*pxn+(1-za)*x2
im=rem(im+1,nm)
if im
mb(:,1)=min(mb(:,1),pxn)
else
mb=[pxn,mb(:,1:po(7)-1)]
end
pn=po(9)*min(mb,[],2)
%os= oversubtraction factor
os=zo*os+(1-zo)*(1+osf.*pn./(pn+pxn))
px=zg*px+(1-zg)*x2
q=max(po(10)*sqrt(pn./x2),1-sqrt(os.*pn./px))
ss(idx)=ss(idx)+irfft(x.*q)
end
if nargout==0
soundsc([sss],fs)
end
%基本减谱法clear
%[xx,fs]=wavread('E:\mywhisper\shu.wav')
%[xx,fs]=wavread('E:\speech\x\w1xun_01.wav')
%[xx,fs]=wavread('E:\speech\耳悔虚语音切割\b\w1ba_5')
[xx,fs]=wavread('D:\yuan.wav')% 读取音频文件yuan.wav,并返回采样数据给变量xx及采样率Fs
[team,row]=size(xx)%将数组xx的行数赋给粗敬team,列数赋给row
if row==2
x=(xx(:,1)+xx(:,2))/2
yy=x
%如果语音信号xx为2列,即信号为双声道,则将其转换成单声道信号,即取碧凳燃两列的平均值赋给x,并将x的值赋给yy
else
x=xx
yy=x
%若语音信号xx为单声道,则将xx的值赋给x,并将x的值赋给yy
end
x=x-mean(x)+0.1*rand(length(x),1
N=length(x)%将语音信号长度赋给变量N
n=220%对语音信号进行分帧,帧长为220
n1=160%帧移为160
frame=floor((N-n)/(n-n1))%将分帧数赋给变量frame
%frame=floor(N/n)
for i=1:frame
y1=x((i-1)*(n-n1)+1:(i-1)*(n-n1)+n).*hamming(n)
%对每段分帧进行加窗处理
fy=fft(y1,n)
nen(i,:)=abs(fy).^2% 将频域信号功率赋给矩阵变量nen
ang(i,:)=angle(fy)%将频域信号的相位角赋给矩阵变量ang
end
yuzhi=sum(sum(nen(2:5,:)))/(4*n)
for i=1:frame
nen(i,:)=nen(i,:)-yuzhi
nen(i,find(nen(i,:)<0))=0
%chuli=nen(i,1:n/2)
%chuli=chuli-yuzhi
%chuli(find(chuli<0))=0
%nen(i,:)=[chuli,fliplr(chuli)]
% nen(i,:)=filter(1,[0.5 0.5],nen(i,:))
% nen(i,find(nen(i,:)<0))=0
end
for i=1:frame
nen(i,:)=sqrt(nen(i,:))%将纯语音功率谱开根,得到频域值
jie=nen(i,:).*exp(j*ang(i,:))
out(i,:)=real(ifft(jie))/hamming(n)'%对纯语音频谱进行逆傅里叶变换,并取其实部,并进行去窗处理
end
zong=out(1,:)'%将第一帧中未重叠部分记入数组zong
jiewei=n
for i=2:frame
zong(jiewei-n1+1:jiewei)
=(zong(jiewei-n1+1:jiewei)+out(i,1:n1)')/2
jiewei=jiewei+n-n1% 使指针jiewei依次指向下一帧的帧尾
zong=[zongout(i,n1+1:end)']将从第二帧开始的每一帧中未重叠部分记入数组zong
end
%zong=out(1,:)'
%for i=2:frame
%zong=[zongout(i,:)']
%end
%
%for i=1:frame
% zong=[zong,nen(i,:)']
% zong(i*(n-n1)+1:(i-1)*(n-n1)+n)
=zong(i*(n-n1)+1:(i-1)*(n-n1)+n)/2
figure(1)%创建图1
subplot(211)%把图形窗口分成2*1个小窗口,取第1个小窗口
plot(x)%以数组x绘图基本二维曲线
axis([1,(n-n1)*frame+n,min(x),max(x)])% 对当前二维图形对象的X轴和Y轴进行标定,x轴的范围为1到最后一个分帧结尾,y轴的范围为带噪语音时域最小值到最大值
subplot(212)%把图形窗口分成2*1个小窗口,取第2个小窗口
specgram(x,fs,1024,n,n1)%画出语音的语谱图
figure(2)%创建图2
subplot(211)%把图形窗口分成2*1个小窗口,取第1个小窗口
plot(zong)%以数组zong绘图基本二维曲线
axis([1,(n-n1)*frame+n,min(zong),max(zong)])% 对当前二维图形对象的X轴和Y轴进行标定,x轴的范围为1到最后一个分帧结尾,y轴的范围为纯语音时域最小值到最大值
subplot(212)%把图形窗口分成2*1个小窗口,取第2个小窗口
specgram(zong,fs,1024,n,n1)%画出语音的语谱图
wavplay(x,fs)%播放单声道带噪语音音频
wavplay(zong,fs)%播放单声道纯净语音音频
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)