1.完整项目描述和程序获取
>面包多安全交易平台:https://mbd.pub/o/bread/ZJeUlphp
>如果链接失效,可以直接打开本站店铺搜索相关店铺:
>如果链接失效,程序调试报错或者项目合作也可以加微信或者QQ联系。
2.部分仿真图预览
3.算法概述
在语音识别(Speech Recognition)和话者识别(Speaker Recognition)方面,最常用到的语音特征就是梅尔倒谱系数(Mel-scale Frequency Cepstral Coefficients,简称MFCC)。根据人耳听觉机理的研究发现,人耳对不同频率的声波有不同的听觉敏感度。从200Hz到5000Hz的语音信号对语音的清晰度影响对大。两个响度不等的声音作用于人耳时,则响度较高的频率成分的存在会影响到对响度较低的频率成分的感受,使其变得不易察觉,这种现象称为掩蔽效应。由于频率较低的声音在内耳蜗基底膜上行波传递的距离大于频率较高的声音,故一般来说,低音容易掩蔽高音,而高音掩蔽低音较困难。在低频处的声音掩蔽的临界带宽较高频要小。所以,人们从低频到高频这一段频带内按临界带宽的大小由密到疏安排一组带通滤波器,对输入信号进行滤波。将每个带通滤波器输出的信号能量作为信号的基本特征,对此特征经过进一步处理后就可以作为语音的输入特征。由于这种特征不依赖于信号的性质,对输入信号不做任何的假设和限制,又利用了听觉模型的研究成果。因此,这种参数比基于声道模型的LPCC相比具有更好的鲁邦性,更符合人耳的听觉特性,而且当信噪比降低时仍然具有较好的识别性能。
4.部分源码
.....................................................................
%Input File
FileName = ['data\man1.mp3';'data\man2.mp3';'data\man3.mp3'];
file_length = size(FileName);
.....................................................................
while(temp_var <= L)
for i = 1:1:Fsample;
if (i+(temp_var2-1)*Fstep) <= L
y1_frame(i,temp_var2) = y1((temp_var2-1)*Fstep+i,1);
end
temp_var = (i+(temp_var2-1)*Fstep);
end
temp_var2 = temp_var2+1;
end
l1 = length(y1_frame);
clear temp_var temp_var2 ;
%FFT
Nfft = 2^nextpow2(Fsample);
Y = fft(y1_frame,Nfft)/(Fsample/2);
f = (Fs/2)*linspace(0,1,Nfft/2 + 1);
Y_periodogram = (Y.*conj(Y))/Nfft;
%Mel-spaced Filter Banks
m1 = 1125*log(1 + 300/700);
m2 = 1125*log(1 + Fs/(2*700));
m = linspace(m1,m2,28);
f1 = 700*(exp(m/1125) - 1);
b = floor((Nfft+1)*f1/Fs);
clear m1 m2 f1
max_len = max(b);
%Filter Bank Design
clear H m
m = 2;
for m =2:1:length(b)-1
k = 1;
while(k <= max(b))
if(k <= b(m) && k >= b(m-1))
H(k,m-1) = (k-b(m-1))/(b(m)-b(m-1));
else if(k <= b(m+1) && k >= b(m))
H(k,m-1) = (b(m+1)-k)/(b(m+1)-b(m));
else
H(k,m-1) = 0;
end
end
k = k+1;
end
end
clear m k b
%Filter Bank Energies
e_pf = transpose(H)*(Y_periodogram((1:max_len),:));
%Cepstral Coefficient Calculation
log_coeff = log(e_pf);
cepstr_coeff = dct(log_coeff);
cepstr_coeff = cepstr_coeff(1:12,:);
for i = 1:l1
train_input(:,(j-1)*l1+i) = cepstr_coeff(:,i);
train_target(j,(j-1)*l1+i)= 1;
end
end
clear file_length
count = 0;
figure;
while(count < 10)
hidden_layer_size = 32;
net = patternnet(hidden_layer_size);
net.divideParam.trainRatio = 0.75;
net.divideParam.valRatio = 0.1;
net.divideParam.testRatio = 0.1;
[net,tr] = train(net,train_input,train_target);
plotperform(tr)
count = count + 1 ;
end
view(net)
save nets.mat net tr
A915