This project aims to successfully synthesise vowels A,E,I,O and U. The synthesis is to be accomplished in MATLAB using all-pole model of uniform acoustic tube approximation of the vocal tract. The all-pole model of the vocal tract is estimated by following these steps:
spline function to the provided vocal tract data,The input to the system consists of Rosenberg pulses, which emulates the volume velocity produced by the glottis.

A short melody has been generated using the vocal tract, and it can be heard here.
In general, the sound produced varied greatly. For instance, the "Male O" sounds extremely realistic, while "Male A" does not. Another important observation must be made about the last "fictitious" tube added at the end of the vocal tract. It can be intuitively imagined that it serves purpose to simulate the lips - how they are shaped and/or protruded during voicing. In particular, an interesting effect has bee observed with the "Child U" vowel, where when the last tube was made to be small in cross-sectional area, the vowel sounded more natural (click here) rather than when tube was left to be large (click here). Perhaps, it has to do with the fact that vowel U make the lips be more rounded and protruded, therefore by making the last fictitious tube smaller in cross-sectional area we simulate that effect.
An interesting observation is that it can be easily predicted how recongizeable the vowel will be by looking at the vocal tract and its acoustic-tube approximation. The more "coarse" and uncorrelated to the true shape the tube-model is, the less recognizeable is the vowel. The tube-model that most corresponds to the vocal tract shape is the "Male O" (here), which sounds the most natural.
%% SETUP
A=[5 5 5 5 6.5 8 8 8 8 8 8 8 8 6.5 5 4 3.2 1.6 2.6 2.6 2 1.6 1.3 1 0.65 0.65 0.65 1 1.6 2.6 4 1 1.3 1.6 2.6];
E=[8 8 5 5 4 2.6 2 2.6 2.6 3.2 4 4 4 5 5 6.5 8 6.5 8 10.5 10.5 10.5 10.5 10.5 8 8 6.5 6.5 6.5 6.5 1.3 1.6 2 2.6];
O=[4 4 3.2 1.6 1.3 1 0.65 0.65 0.65 0.65 0.65 0.65 0.65 1.3 2.6 4 6.5 8 8 10.5 10.5 10.5 10.5 10.5 10.5 10.5 10.5 10.5 8 8 2 2 2.6 3.2];
I=[3.2 3.2 3.2 3.2 6.5 13 13 16 13 10.5 10.5 8 8 6.5 6.5 5 5 4 3.2 2 1.6 2.6 1.3 0.65 0.65 1 1 1.3 1.6 2 3.2 4 5 5 1.3 1.3 1.6 2.6];
U=[0.65 0.65 0.32 0.32 2 5 10.5 13 13 13 13 10.5 8 6.5 5 3.2 2.6 2 2 2 1.6 1.3 2 1.6 1 1 1 1.3 1.6 3.2 5 8 8 10.5 10.5 10.5 2 2 2.6 2.6];
Fs=8e3;
c=350;
dx=0.5e-2;%2 cm
do_plotting=0;%1=>yes, otherwise=>no
%radiation constant alpha -- look in "R(z)" cell
alpha=0.5;
F0=200;%fundamental frequency
%% GENERATE TUBE MODEL
L=size(O,2)*dx;
x=dx:dx:L;
xx=dx:1e-5:L;
AA=spline(x,O,xx);
N=round((2*pi*Fs*L)/(pi*350));
%we want to "sample" the vocal tract in the center of the tobe length:
DX=L/N;
A_N=[];
nth_tube=1;
for n=1:size(xx,2)
if xx(1,n)>=(2*nth_tube-1)*DX/2 %sample here - middle of nth tube
A_N(1,nth_tube)=AA(n);
nth_tube=nth_tube+1;
end
end
AA_N=[];
nth_tube=1;
for n=1:size(xx,2)
if xx(n)<=DX*nth_tube
AA_N(1,n)=A_N(1,nth_tube);
else
nth_tube=nth_tube+1;
AA_N(1,n)=A_N(1,nth_tube);
end
end
%% ADD FICTITIOUS EXTRA TUBE OF LARGE AREA
A_N=[A_N 30];
%% CALCULATE REFLECTION COEFFICIENTS r
r=[];
for i=1:N %for 1:N tubes
r(1,i)=(A_N(1,i+1)-A_N(1,i))/(A_N(1,i+1)+A_N(1,i));%reflection coefficient - use formula
end
%% D(z) - REAL AND IDEALIZED
%real-world
D_z=[1];
for i=1:N
D_z=[D_z 0]+r(1,i)*[0 fliplr(D_z)];
end
Dideal_z=[1];
%here we make Nth reflection coefficient 1 , for idealized case
r_ideal=r;
r_ideal(1,N)=1;
for i=1:N
Dideal_z=[Dideal_z 0]+r_ideal(1,i)*[0 fliplr(Dideal_z)];
end
[H,F] = freqz(1,D_z,1024,Fs);
[Hideal,F] = freqz(1,Dideal_z,1024,Fs);
%% PLOT DATA FOR THE VOWEL MODEL
% figure(1);
% plot(xx,AA,'-r',xx,-AA,'-r'); hold on;
% plot(xx,AA_N,'-b',xx,-AA_N,'-b'); hold off;
% xlabel('Length of vocal tract, m');ylabel('Cross-sectional area A(x),m^2');title('Vocal tract area and tube approximation');
% print 'tube_model.jpg' -f1 -djpeg90 -r100;
% figure(2);
% stem(1:N,r,'-rs');axis([0 N+1 -1 1]);
% xlabel('Number of the tube');ylabel('Reflection coefficient');title('Reflection coefficients between tubes');
% print 'reflection_coefficients.jpg' -f2 -djpeg90 -r100;
% figure(3);
% plot(F,20*log10(abs(H)),F,20*log10(abs(Hideal)));
% xlabel('Frequency, Hz');ylabel('Magnitude of frequency response, dB');title('Ideal and realistic frequency response of tube model');
% print 'ideal_real_freqresp.jpg' -f3 -djpeg90 -r100;
%% X(z)=R(z)/D(z) - ADD RADIATION MODEL
R_z=[0 -alpha];
[H_X,F]=freqz(R_z,D_z,1024,Fs);
%% GENERATE ROSENBERG PULSE INPUT
u=rosenberg(F0,Fs);
U=fft(u,1024);
U_PSD=U.*conj(U);
input_plot=[u u u u u u];
% figure(5);
% subplot(2,1,1);
% plot((1/Fs)*[1:size(input_plot,2)],input_plot);
% xlabel('Time, sec');ylabel('Amplitude');title('Glottal waveform (Rosenberg pulse), six periods');
% subplot(2,1,2);
% plot((Fs/1024)*[1:512],U_PSD(1,[1:512]));
% xlabel('Frequency, Hz');ylabel('Power Spectrum Density');title('Power Spectrum Density of Rosenberg pulse');
% print 'rosenberg_pulse_x6.jpg' -f5 -djpeg90 -r100;
%% GENERATE 2 SECONDS OF OUTPUT SOUND
u_duration=size(u,2)*(1/Fs);
N_u=round(2/u_duration);%how many rosenberg pulses of set frequency fit in 2 seconds
input=[];
for n=1:N_u
input=[input u];
end
% output=filter(R_z,D_z,input);%output sound
% wavwrite(0.3*output,Fs,'output.wav');
%% MELODY
step_ratio=2^(1/12);
melody=[0 4 7 9 7 4 2 0];
input=[];
for i=1:size(melody,2)
u=rosenberg(F0*step_ratio^melody(1,i),Fs);
N_u=round(1/u_duration);%how many rosenberg pulses of set frequency fit in 1 second
for n=1:N_u
input=[input u];
end
end
output=filter(R_z,D_z,input);
wavwrite(0.3*output,Fs,'melody.wav');
function [A_z,Dideal_z,Dreal_z]=D_z(N)
A_z=zeros(1,1+N/2);
A_z(1,1+N/2)=1;%z^(-N/2)
%real-world
Dreal_z=[1];
for i=1:N
Dreal_z=[Dreal_z 0]+A_R_n(1,i)*[0 fliplr(Dreal_z)];
end
%ideal-case
Dideal_z=[1];
A_R_n(1,N)=1;%idealized radiation load
for i=1:N
Dideal_z=[Dideal_z 0]+A_R_n(1,i)*[0 fliplr(Dideal_z)];
end
function [AA,xx,A_N,N]=tube_model(A,dx,Fs,c)
L=size(A,2)*dx;
x=dx:dx:L;
xx=dx:1e-3:L;
AA=spline(x,A,xx);
N=round((2*pi*Fs*L)/(pi*350));
%we want to "sample" the vocal tract in the center of the tobe length:
DX=L/N;
A_N=[];
for i=1:N
for n=1:size(xx,2)
if xx(n)>=i*DX %sample here
A_N(1,i)=AA(n);
continue;
end
end
end
function R_t=rosenberg_pulse(N1,N2)
%generates the row vector of rosenberg pulse samples, N1 being the number
%of samples in the opening phase, and N2 being the number of samples in the
%closing phase of the pulse.
R_t=[];
for i=1:N1+N2
if i<=N1
R_t(1,i)=(1/2)*(1-cos(i*(pi/N1)));
elseif i>N1 && i<=N1+N2
R_t(1,i)=cos((pi*(i-N1))/(2*N2));
end
end
function u=rosenberg(F0,Fs)
T0=1/F0;
N_samples=floor(T0*Fs);
OPEN_PHASE=floor(0.60*N_samples);
RETURN_PHASE=floor(0.10*N_samples);
CLOSED_PHASE=N_samples-(OPEN_PHASE+RETURN_PHASE);
u=[zeros(1,CLOSED_PHASE) rosenberg_pulse(OPEN_PHASE,RETURN_PHASE)];