(1)端点检测部分(vad):
function [x1,x2] = vad(x)
%语音信号x幅度归一化到[-1,1]
x = double(x)
x = x / max(abs(x))
%常数设置
FrameLen = 240%帧长度为240
FrameInc = 80%帧移为80
amp1 = 10%短时能量高门限10
amp2 = 2%短时能量低门限为2
zcr1 = 10%短时过零率高门限为10
zcr2 = 5%短时过零率低门限为5
maxsilence =3%静音时间门限3*10ms= 30ms
minlen= 15%最小语音时间长度15*10ms = 150ms
status= 0%
count= 0%语音时间累计
silence = 0%静音时间累计
%计算过零率
tmp1= enframe(x(1:end-1), FrameLen, FrameInc)
tmp2= enframe(x(2:end), FrameLen, FrameInc)
signs =(tmp1.*tmp2)<0%符号数组,用于存储相邻两个采样点符号是否相同,即是否穿越0电平
diffs = (tmp1-tmp2)>0.02%度量相邻两个采样点之间距离,如果大于门限0.02(经验值),则1,否则0
zcr = sum(signs.*diffs,2)%过零率
%计算短时能量
amp =sum(abs(enframe(filter([1 -0.9375], 1, x), FrameLen, FrameInc)), 2)
%调整能量门限
amp1 = min(amp1,max(amp)/4)
amp2 = min(amp2,max(amp)/8)
%开始端点检测
x1 = 0
x2 = 0
for n=1:length(zcr)
goto = 0
switch status
case {0,1}% 0 =静音, 1 =可能开始
if amp(n) >amp1%确信进入语音段
x1 = max(n-count-1,1)
status= 2
silence = 0
count= count + 1
elseif amp(n) >amp2 || ... %可能处于语音段
zcr(n) >zcr2
status = 1
count= count + 1
else%静音状态
status= 0
count= 0
end
case 2,% 2 =语音段
if amp(n) >amp2 || ...%保持在语音段
zcr(n) >zcr2
count = count + 1
else%语音将结束
silence = silence+1
if silence <maxsilence %静音还不够长,尚未结束
count= count + 1
elseif count <minlen%语音长度太短,认为是噪声
status= 0
silence = 0
count= 0
else%语音结束
status= 3
end
end
case 3,
break
end
end
count = count-silence/2
x2 = x1 + count -1
subplot(311)
plot(x)
axis([1 length(x) -1 1])
xlabel('语音信号')
line([x1*FrameIncx1*FrameInc ],[-1,1],'Color','red')
line([x2*FrameIncx2*FrameInc ],[-1,1],'Color','red')
subplot(312)
plot(amp)
axis([1 length(amp) 0max(amp)])
xlabel('短时能量')
line([x1,x1],[min(amp),max(amp)],'Color','red')
line([x2,x2],[min(amp),max(amp)],'Color','red')
subplot(313)
plot(zcr)
axis([1 length(zcr) 0max(zcr)])
xlabel('过零率')
line([x1,x1],[min(zcr),max(zcr)],'Color','red')
line([x2,x2],[min(zcr),max(zcr)],'Color','red')
(2)MFCC部分:
function ccc = mfcc(x)
%归一化mel滤波器组系数
bank=melbankm(24,256,8000,0,0.5,'m')%24滤波器个数,8000采样频率
bank=full(bank)
bank=bank/max(bank(:))
% DCT系数,12*24
for k=1:12
n=0:23
dctcoef(k,:)=cos((2*n+1)*k*pi/(2*24))
end
%归一化倒谱提升窗口
w = 1 + 6 * sin(pi *(1:12) ./ 12)
w = w/max(w)
%预加重滤波器
xx=double(x)
xx=filter([1-0.9375],1,xx)
%语音信号分帧,xx是输入语音信号;256是帧长;80是帧移
xx=enframe(xx,256,80)
%计算每帧的MFCC参数
for i=1:size(xx,1)
y = xx(i,:)
s = y' .* hamming(256)%加汉明窗
t = abs(fft(s))%fft变换
t = t.^2
c1=dctcoef * log(bank * t(1:129))
c2 = c1.*w'
m(i,:)=c2'
end
%差分系数
dtm = zeros(size(m))
for i=3:size(m,1)-2
dtm(i,:) = -2*m(i-2,:) - m(i-1,:) + m(i+1,:)+ 2*m(i+2,:)
end
dtm = dtm / 3
%合并mfcc参数和一阶差分mfcc参数
ccc = [m dtm]
%去除首尾两帧,因为这两帧的一阶差分参数为0
ccc =ccc(3:size(m,1)-2,:)
(3)dtw计算部分:
function dist = dtw2(test, ref)
global x y_min y_max
global t r
global D d
global m n
t = test
r = ref
n = size(t,1)
m = size(r,1)
d = zeros(m,1)
D =ones(m,1) *realmax
D(1) = 0
%如果两个模板长度相差过多,匹配失败
if (2*m-n<3) || (2*n-m<2)
dist =realmax
return
end
%计算匹配区域
xa = round((2*m-n)/3)
xb = round((2*n-m)*2/3)
if xb>xa
%xb>xa,按下面三个区域匹配
%1:xa
%xa+1:xb
%xb+1:N
for x =1:xa
y_max= 2*x
y_min= round(0.5*x)
warp
end
for x =(xa+1):xb
y_max= round(0.5*(x-n)+m)
y_min= round(0.5*x)
warp
end
for x =(xb+1):n
y_max= round(0.5*(x-n)+m)
y_min= round(2*(x-n)+m)
warp
end
elseif xa>xb
%xa>xb,按下面三个区域匹配
%0:xb
%xb+1:xa
%xa+1:N
for x =1:xb
y_max= 2*x
y_min= round(0.5*x)
warp
end
for x =(xb+1):xa
y_max= 2*x
y_min= round(2*(x-n)+m)
warp
end
for x =(xa+1):n
y_max= round(0.5*(x-n)+m)
y_min= round(2*(x-n)+m)
warp
end
elseif xa==xb
%xa=xb,按下面两个区域匹配
%0:xa
%xa+1:N
for x =1:xa
y_max= 2*x
y_min= round(0.5*x)
warp
end
for x =(xa+1):n
y_max= round(0.5*(x-n)+m)
y_min= round(2*(x-n)+m)
warp
end
end
%返回匹配分数
dist = D(m)
function warp
global x y_min y_max
global t r
global D d
global m n
d = D
for y = y_min:y_max
D1 = D(y)
if y>1
D2= D(y-1)
else
D2 =realmax
end
if y>2
D3= D(y-2)
else
D3 = realmax
end
d(y) =sum((t(x,:)-r(y,:)).^2) + min([D1,D2,D3])
end
D = d
(4)测试函数testdtw部分;
disp('正在计算参考模板的参数...')
for i=1:10
fname = sprintf('G:\\石东东\\语音\\%da.wav',i-1)
x = wavread(fname)
[x1 x2] = vad(x)
m = mfcc(x)
m = m(x1-2:x2-2,:)
ref(i).mfcc = m
end
disp('正在计算测试模板的参数...')
for i=1:10
fname = sprintf('G:\\石东东\\语音\\%db.wav',i-1)
x = wavread(fname)
[x1 x2] = vad(x)
m = mfcc(x)
m = m(x1-2:x2-2,:)
test(i).mfcc = m
end
disp('正在进行模板匹配...')
dist = zeros(10,10)
for i=1:10
for j=1:10
dist(i,j) = dtw2(test(i).mfcc, ref(j).mfcc)
end
end
disp('正在计算匹配结果...')
for i=1:10
[d,j] = min(dist(i,:))
fprintf('测试模板%d的识别结果为:%d\n', i-1, j-1)
end
%mfccfunction mfc=mfcc(x)
%%%%%%%%%%%%%%%%%%%%%%%%%
%对输入的语音序列x进行mfcc参数提取,返回mfcc参数和一阶差分mfcc参数,mel滤波器的阶数为24
%fft变换长度为256,采样频率为8000HZ,对x 256点分为一帧
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
bank=melbankm(24,256,8000,0,0.5,'m')
%归一化mel滤波器组参数
bank=full(bank)
bank=bank/max(bank((:))
%DCT系数,12*24
for k=1:12
n=0:23
dctcoef(:,k)=cos((2*n+1)*k*pi/(2*24))
end
%归一化倒谱提升窗口
w=1+6*sin(pi*[1:12]./12)
w=w/max(w)
%预加重滤波器
xx=double(x)
xx=filter([1 -0.9375],1,xx)
%语音信号分帧
xx=enframe(xx,256,80)
%计算每帧的mfcc参数
for i=1:size(xx,1) %
y=xx(i,:)
s=y'.*hamming(256)
t=abs(fft(s))
t=t.^2%计算能量
c1=dctcoef*log(bank*t(1:129))%dctcoef为dct系数,bank归一化mel滤波器组系数
c2=c1.*w'%w为归一化倒谱提升窗口
m(i,:)=c2'
end
%差分系数
dtm=zeros(size(m))
for i=3:size(m,1)-2
dtm(i,:)=-2*(i-2,:)-m(i-1,1)+2*m(i+2,:)
end
dtm=dtm/3
%合并mfcc参数和一阶差分mfcc参数
mfc=[m dtm]
%去除首尾两帧,因为这两帧的一阶差分参数为0
mfc=mfc(3:size(m,1)-2,:)
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)