function feaMat=sdFeaExtract(au, opt, showPlot) % sdFeaExtract: Feature extraction for SD (stress detection) % % Example: % auFile='E:\dataSet\Merriam_Webster-2010-­^»y³æ¦r¿ý­µ/tomorrow.wav'; % text='tomorrow'; % language='english'; % asraOutput=waveAssess(auFile, text, language, 1, 'temp.pv'); % Forced alignment % au.asraOutput=asraOutput.cm.word % au.text=text; % au.sylNum=3; % au.stressPos=2; % au.asraOutput(2).phone(1).isVowel=0; % au.asraOutput(2).phone(2).isVowel=1; % au.asraOutput(2).phone(3).isVowel=0; % au.asraOutput(2).phone(4).isVowel=1; % au.asraOutput(2).phone(5).isVowel=0; % au.asraOutput(2).phone(6).isVowel=1; % fea=sdFeaExtract(au, [], 1); % Category: Audio feature extraction for stress detection % Roger Jang, 20150603 if nargin<1, selfdemo; return; end if ischar(au) && strcmpi(au, 'inputName') % Return input names feaMat={'duration', 'pitchMin', 'pitchMean', 'pitchMax', 'pitchRange', 'volMin', 'volMean', 'volMax', 'volRange', 'pitchCoef0', 'pitchCoef1', 'pitchCoef2', 'pitchCoef3', 'pitchCoef4', 'volCoef0', 'volCoef1', 'volCoef2', 'volCoef3', 'volCoef4'}; return end if ischar(au) && strcmpi(au, 'defaultOpt') % Set default options feaMat.dim=8; % Dummy field to be added later return end if nargin<2 || isempty(opt), opt=feval(mfilename, 'defaultOpt'); end if nargin<3, showPlot=0; end sylNum=length(au.asraOutput); if sylNum~=3, error('SylNum is not equal to 3!'); end vowelIndex=find([au.asraOutput(2).phone.isVowel]); vowelNum=length(vowelIndex); vowelFea=zeros(19, 1); feaMat=zeros(length(vowelFea), vowelNum); fitOrder=4; for i=1:vowelNum phone=au.asraOutput(2).phone(vowelIndex(i)); k=0; k=k+1; vowelFea(k)=diff(phone.interval); % duration % minV=min(phone.pitch); maxV=max(phone.pitch); meanV=mean(phone.pitch); k=k+1; vowelFea(k)=min(phone.pitch); % pitchMin k=k+1; vowelFea(k)=mean(phone.pitch); % pitchMean k=k+1; vowelFea(k)=max(phone.pitch); % pitchMax k=k+1; vowelFea(k)=max(phone.pitch)-min(phone.pitch); % pitchRange k=k+1; vowelFea(k)=min(phone.volume); % volMin k=k+1; vowelFea(k)=mean(phone.volume); % volMean k=k+1; vowelFea(k)=max(phone.volume); % volMax k=k+1; vowelFea(k)=max(phone.volume)-min(phone.volume); % volRange pitchCoef=legendrePolyCoef(phone.pitch, fitOrder); k=k+1; vowelFea(k:k+fitOrder)=pitchCoef(:); % Legendre polynomial fitting of pitch volCoef=legendrePolyCoef(phone.volume, fitOrder); k=k+fitOrder+1; vowelFea(k:k+fitOrder)=volCoef(:); % Legendre polynomial fitting of volume % Attach the fea vec to the original structure feaMat(:,i)=vowelFea; end if any(isnan(feaMat)), keyboard; end % Normalize fea with the utterance feaMean=mean(feaMat, 2); feaStd=std(feaMat, [], 2); feaMat=bsxfun(@minus, feaMat, feaMean); feaMat=bsxfun(@rdivide, feaMat, feaStd); %if any(isnan(feaMat3)), keyboard; end % This happens when std=0 if showPlot imagesc(feaMat); end % ====== Legendre polynomial fitting function coef=legendrePolyCoef(vec, fitOrder) len=length(vec); if len