求助语音识别代码的注释,要每一句都写明意思,谢谢

求助语音识别代码的注释,要每一句都写明意思,谢谢,第1张

这个是完整的代码,我自己的账号发不了这么长,希望好心人帮忙注释啊,非常感谢!!!

(1)端点检测部分(vad):

function [x1,x2] = vad(x)

%语音信号x幅度归一化到[-1,1]

x = double(x)

x = x / max(abs(x))

%常数设置

FrameLen = 240%帧长度为240

FrameInc = 80%帧移为80

amp1 = 10%短时能量高门限10

amp2 = 2%短时能量低门限为2

zcr1 = 10%短时过零率高门限为10

zcr2 = 5%短时过零率低门限为5

maxsilence =3%静音时间门限3*10ms= 30ms

minlen= 15%最小语音时间长度15*10ms = 150ms

status= 0%

count= 0%语音时间累计

silence = 0%静音时间累计

%计算过零率

tmp1= enframe(x(1:end-1), FrameLen, FrameInc)

tmp2= enframe(x(2:end), FrameLen, FrameInc)

signs =(tmp1.*tmp2)<0%符号数组,用于存储相邻两个采样点符号是否相同,即是否穿越0电平

diffs = (tmp1-tmp2)>0.02%度量相邻两个采样点之间距离,如果大于门限0.02(经验值),则1,否则0

zcr = sum(signs.*diffs,2)%过零率

%计算短时能量

amp =sum(abs(enframe(filter([1 -0.9375], 1, x), FrameLen, FrameInc)), 2)

%调整能量门限

amp1 = min(amp1,max(amp)/4)

amp2 = min(amp2,max(amp)/8)

%开始端点检测

x1 = 0

x2 = 0

for n=1:length(zcr)

goto = 0

switch status

case {0,1}% 0 =静音, 1 =可能开始

if amp(n) >amp1%确信进入语音段

x1 = max(n-count-1,1)

status= 2

silence = 0

count= count + 1

elseif amp(n) >amp2 || ... %可能处于语音段

zcr(n) >zcr2

status = 1

count= count + 1

else%静音状态

status= 0

count= 0

end

case 2,% 2 =语音段

if amp(n) >amp2 || ...%保持在语音段

zcr(n) >zcr2

count = count + 1

else%语音将结束

silence = silence+1

if silence <maxsilence %静音还不够长,尚未结束

count= count + 1

elseif count <minlen%语音长度太短,认为是噪声

status= 0

silence = 0

count= 0

else%语音结束

status= 3

end

end

case 3,

break

end

end

count = count-silence/2

x2 = x1 + count -1

subplot(311)

plot(x)

axis([1 length(x) -1 1])

xlabel('语音信号')

line([x1*FrameIncx1*FrameInc ],[-1,1],'Color','red')

line([x2*FrameIncx2*FrameInc ],[-1,1],'Color','red')

subplot(312)

plot(amp)

axis([1 length(amp) 0max(amp)])

xlabel('短时能量')

line([x1,x1],[min(amp),max(amp)],'Color','red')

line([x2,x2],[min(amp),max(amp)],'Color','red')

subplot(313)

plot(zcr)

axis([1 length(zcr) 0max(zcr)])

xlabel('过零率')

line([x1,x1],[min(zcr),max(zcr)],'Color','red')

line([x2,x2],[min(zcr),max(zcr)],'Color','red')

(2)MFCC部分:

function ccc = mfcc(x)

%归一化mel滤波器组系数

bank=melbankm(24,256,8000,0,0.5,'m')%24滤波器个数,8000采样频率

bank=full(bank)

bank=bank/max(bank(:))

% DCT系数,12*24

for k=1:12

n=0:23

dctcoef(k,:)=cos((2*n+1)*k*pi/(2*24))

end

%归一化倒谱提升窗口

w = 1 + 6 * sin(pi *(1:12) ./ 12)

w = w/max(w)

%预加重滤波器

xx=double(x)

xx=filter([1-0.9375],1,xx)

%语音信号分帧,xx是输入语音信号;256是帧长;80是帧移

xx=enframe(xx,256,80)

%计算每帧的MFCC参数

for i=1:size(xx,1)

y = xx(i,:)

s = y' .* hamming(256)%加汉明窗

t = abs(fft(s))%fft变换

t = t.^2

c1=dctcoef * log(bank * t(1:129))

c2 = c1.*w'

m(i,:)=c2'

end

%差分系数

dtm = zeros(size(m))

for i=3:size(m,1)-2

dtm(i,:) = -2*m(i-2,:) - m(i-1,:) + m(i+1,:)+ 2*m(i+2,:)

end

dtm = dtm / 3

%合并mfcc参数和一阶差分mfcc参数

ccc = [m dtm]

%去除首尾两帧,因为这两帧的一阶差分参数为0

ccc =ccc(3:size(m,1)-2,:)

(3)dtw计算部分:

function dist = dtw2(test, ref)

global x y_min y_max

global t r

global D d

global m n

t = test

r = ref

n = size(t,1)

m = size(r,1)

d = zeros(m,1)

D =ones(m,1) *realmax

D(1) = 0

%如果两个模板长度相差过多,匹配失败

if (2*m-n<3) || (2*n-m<2)

dist =realmax

return

end

%计算匹配区域

xa = round((2*m-n)/3)

xb = round((2*n-m)*2/3)

if xb>xa

%xb>xa,按下面三个区域匹配

%1:xa

%xa+1:xb

%xb+1:N

for x =1:xa

y_max= 2*x

y_min= round(0.5*x)

warp

end

for x =(xa+1):xb

y_max= round(0.5*(x-n)+m)

y_min= round(0.5*x)

warp

end

for x =(xb+1):n

y_max= round(0.5*(x-n)+m)

y_min= round(2*(x-n)+m)

warp

end

elseif xa>xb

%xa>xb,按下面三个区域匹配

%0:xb

%xb+1:xa

%xa+1:N

for x =1:xb

y_max= 2*x

y_min= round(0.5*x)

warp

end

for x =(xb+1):xa

y_max= 2*x

y_min= round(2*(x-n)+m)

warp

end

for x =(xa+1):n

y_max= round(0.5*(x-n)+m)

y_min= round(2*(x-n)+m)

warp

end

elseif xa==xb

%xa=xb,按下面两个区域匹配

%0:xa

%xa+1:N

for x =1:xa

y_max= 2*x

y_min= round(0.5*x)

warp

end

for x =(xa+1):n

y_max= round(0.5*(x-n)+m)

y_min= round(2*(x-n)+m)

warp

end

end

%返回匹配分数

dist = D(m)

function warp

global x y_min y_max

global t r

global D d

global m n

d = D

for y = y_min:y_max

D1 = D(y)

if y>1

D2= D(y-1)

else

D2 =realmax

end

if y>2

D3= D(y-2)

else

D3 = realmax

end

d(y) =sum((t(x,:)-r(y,:)).^2) + min([D1,D2,D3])

end

D = d

(4)测试函数testdtw部分;

disp('正在计算参考模板的参数...')

for i=1:10

fname = sprintf('G:\\石东东\\语音\\%da.wav',i-1)

x = wavread(fname)

[x1 x2] = vad(x)

m = mfcc(x)

m = m(x1-2:x2-2,:)

ref(i).mfcc = m

end

disp('正在计算测试模板的参数...')

for i=1:10

fname = sprintf('G:\\石东东\\语音\\%db.wav',i-1)

x = wavread(fname)

[x1 x2] = vad(x)

m = mfcc(x)

m = m(x1-2:x2-2,:)

test(i).mfcc = m

end

disp('正在进行模板匹配...')

dist = zeros(10,10)

for i=1:10

for j=1:10

dist(i,j) = dtw2(test(i).mfcc, ref(j).mfcc)

end

end

disp('正在计算匹配结果...')

for i=1:10

[d,j] = min(dist(i,:))

fprintf('测试模板%d的识别结果为:%d\n', i-1, j-1)

end

%mfcc

function mfc=mfcc(x)

%%%%%%%%%%%%%%%%%%%%%%%%%

%对输入的语音序列x进行mfcc参数提取,返回mfcc参数和一阶差分mfcc参数,mel滤波器的阶数为24

%fft变换长度为256,采样频率为8000HZ,对x 256点分为一帧

%%%%%%%%%%%%%%%%%%%%%%%%%%%%

bank=melbankm(24,256,8000,0,0.5,'m')

%归一化mel滤波器组参数

bank=full(bank)

bank=bank/max(bank((:))

%DCT系数,12*24

for k=1:12

n=0:23

dctcoef(:,k)=cos((2*n+1)*k*pi/(2*24))

end

%归一化倒谱提升窗口

w=1+6*sin(pi*[1:12]./12)

w=w/max(w)

%预加重滤波器

xx=double(x)

xx=filter([1 -0.9375],1,xx)

%语音信号分帧

xx=enframe(xx,256,80)

%计算每帧的mfcc参数

for i=1:size(xx,1) %

y=xx(i,:)

s=y'.*hamming(256)

t=abs(fft(s))

t=t.^2%计算能量

c1=dctcoef*log(bank*t(1:129))%dctcoef为dct系数,bank归一化mel滤波器组系数

c2=c1.*w'%w为归一化倒谱提升窗口

m(i,:)=c2'

end

%差分系数

dtm=zeros(size(m))

for i=3:size(m,1)-2

dtm(i,:)=-2*(i-2,:)-m(i-1,1)+2*m(i+2,:)

end

dtm=dtm/3

%合并mfcc参数和一阶差分mfcc参数

mfc=[m dtm]

%去除首尾两帧,因为这两帧的一阶差分参数为0

mfc=mfc(3:size(m,1)-2,:)


欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/yw/11297186.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2023-05-15
下一篇 2023-05-15

发表评论

登录后才能评论

评论列表(0条)

保存