function [fea, errorStatus, meanVolume, meanClarity, medianPitch]=wave2fea(au, sidOpt, showInfo); %wave2fea: Audio file to feature % % Usage: % [fea, errorStatus, meanVolume, meanClarity, medianPitch]=wave2fea(au, sidOpt, showInfo); % % Example: % auFile='¤£¦Y¸²µå­Ë¦R¸²µå¥Ö#019_12672_44585.wav'; % au=myAudioRead(auFile); % sidOpt=sidOptSet; % [fea, errorStatus, meanVolume, meanClarity, medianPitch]=wave2fea(au, sidOpt, 1); if nargin<1, selfdemo; return; end if nargin<2, sidOpt=sidOptSet; end if nargin<3, showInfo=0; end y=au.signal; fs=au.fs; nbits=au.nbits; fea=[]; errorStatus=0; meanVolume=nan; meanClarity=nan; medianPitch=nan; % PT parameters (This is specified first since EPD needs to have the same frameSize and overlap.) dataMode=1; % float data type pfType=2; ptOpt=ptOptSet(fs, nbits, pfType); ptOpt.volThresholding=0; % EPD parameters epdParam=epdPrmSet(fs); epdParam.frameSize=ptOpt.frameSize; % To match that of ptOpt epdParam.overlap=ptOpt.overlap; if sidOpt.useGtEpd % Use GT Epd by human [parentDir, mainName]=fileparts(au.file); items=split(mainName, '_'); epInSampleIndex(1)=eval(items{end-1}); epInSampleIndex(2)=eval(items{end}); else [epInSampleIndex, epInFrameIndex, soundSegment, zeroOneVec, volume]=feval(sidOpt.epdFcn, au, epdParam, showInfo); % EPD end if isempty(epInSampleIndex) msg=sprintf('Epd failure!'); fprintf('%s\n', msg); fprintf(fid, '%s\n', msg); errorStatus=1; % Record error due to failed EPD return; end % Compute the mean volume, clarity, and pitch meanVolume=dot(zeroOneVec, volume)/sum(zeroOneVec); % Compute the mean clarity pfType=1; % 0 for AMDF, 1 for ACF ptOpt=ptOptSet(au.fs, au.nbits, pfType); ptOpt.mainFun='maxPickingOverPf'; [pitch, clarity]=pitchTrack(au, ptOpt); index=find(pitch~=0); medianPitch=median(pitch(index)); meanClarity=mean(clarity(index)); y=y(epInSampleIndex(1):epInSampleIndex(2)); % silence is not used if sidOpt.useWaveformNormalization y=y/max(abs(y))*2^nbits/2; % Waveform normalization end switch(sidOpt.feaType) case 'mfcc' if sidOpt.useIntFea==1 % Defined in paramSet.m fea=wave2mfccInt(y, fs, nbits); else % fea=wave2mfccMex(y, fs, nbits, 'mfcc12.cfg'); fea=wave2mfcc(y, fs); end if ~sidOpt.useEnergy, fea=fea(2:end, :); end case 'spectrum' frameMat=buffer2(y, 320, 160); fea=[]; for k=1:size(frameMat, 2) fea=[fea, fftOneSide(frameMat(:,k), fs)]; end % if sidOpt.useIntFea==1 % Defined in paramSet.m % fea=wave2mfccInt(y, fs, nbits); % else % fea=wave2mfccMex(y, fs, nbits, 'mfcc12.cfg'); % end if ~sidOpt.useEnergy, fea=fea(2:end, :); end case 'volume' frameMat=buffer2(y, 320, 160); frameNum=size(frameMat, 2); frameMean=mean(frameMat); frameMat=frameMat-ones(320,1)*frameMean; volume=sum(abs(frameMat)); fea=volume; case 'pitch' pfType=2; ptOpt=ptOptSet(fs, nbits, pfType); % acf ptOpt.frameSize=512; ptOpt.overlap=512-160; [pitch, clarity]=ptByPfMex(y, fs, nbits, ptOpt); fea=pitch; end % ====== Temporal normalization switch (sidOpt.temporalNormMode) case 1 % CMS frameNum=size(fea, 2); cepsMean=mean(fea, 2); if sidOpt.useIntFea==1 cepsMean=round(cepsMean); end fea=fea-cepsMean*ones(1, frameNum); case 2 % CN fea=inputNormalize(fea); end if showInfo figure; mesh(fea); end % ====== Self demo function selfdemo mObj=mFileParse(which(mfilename)); strEval(mObj.example);