function au2=hummingFeaExtract(au, vsdOpt, showPlot) % hummingFeaExtract: Wave to feature conversion for s/u/v detection % % Usage: % au2=hummingFeaExtract(au, showPlot) % % Example: % auFile = 'test.wav'; % auFile='d:/dataSet/childSong/waveFile/2007-音訊處理與辨識/19461108任佳王民/十個印第安人_不詳_0.wav'; % vsdOpt=vsdOptSet; % showPlot=1; % au2=hummingFeaExtract(auFile, vsdOpt, showPlot); % Roger Jang, 20040910, 20070417, 20130201, 20200112 if nargin<1, selfdemo; return; end if nargin<2, vsdOpt=vsdOptSet; end if nargin<3, showPlot=0; end %% Read the given audio file if ischar(au), au=myAudioRead(au); end % au is actual the wave file name %au=waveFormatConvert(au, 8000, 8, 1); % Format conversion to mono, 8KHz, 8bits %% Read the given pitch file pvFile=[au.file(1:end-3), 'pv']; if ~exist(pvFile, 'file') % Cannot find the pv file to specify the groundtruth warning('Cannot fine the pv file %s!', pvFile); feature=[]; frameClass=[]; other=[]; au2=au; au2.feature=feature; au2.tOutput=frameClass; au2.other=other; return end %% Frame blocking and featuer extraction frameSize=vsdOpt.frameSize; overlap=vsdOpt.overlap; frameMat=buffer2(au.signal(:, vsdOpt.usedChannel), frameSize, overlap); frameNum=size(frameMat, 2); % ====== zero-mean for each frame frameMat=frameZeroMean(frameMat, vsdOpt.frameZeroMeanOrder); feature=[]; inputName={}; % ====== Feature: volume vol = frame2volume(frameMat); feature=[feature; vecRatio(vol)]; inputName{end+1}='vol'; % ====== Feature: zcr zcr = frame2zcr(frameMat, 2); feature=[feature; vecRatio(zcr)]; inputName{end+1}='zcr'; % ====== Feature: frame local max count frameLocalMaxIndex=localMax(frameMat); frameLocalMaxCount=sum(frameLocalMaxIndex); feature=[feature; vecRatio(frameLocalMaxCount)]; inputName{end+1}='frameLocalMaxCount'; % ====== Feature: acf clarity [acfClarity, acfMat]=frame2clarity(frameMat, au.fs, 'acf'); feature=[feature; vecRatio(acfClarity)]; inputName{end+1}='acfClarity'; % ====== Feature: local max count of acf acfLocalMaxIndex=localMax(acfMat); acfLocalMaxCount=sum(acfLocalMaxIndex); feature=[feature; vecRatio(acfLocalMaxCount)]; inputName{end+1}='acfLocalMaxCount'; % ====== Feature: nsdf clarity [nsdfClarity, nsdfMat]=frame2clarity(frameMat, au.fs, 'nsdf'); feature=[feature; vecRatio(nsdfClarity)]; inputName{end+1}='nsdfClarity'; % ====== Feature: local max count of nsdf nsdfLocalMaxIndex=localMax(nsdfMat); nsdfLocalMaxCount=sum(nsdfLocalMaxIndex); feature=[feature; vecRatio(nsdfLocalMaxCount)]; inputName{end+1}='nsdfLocalMaxCount'; % ====== Feature: amdf clarity [amdfClarity, amdfMat]=frame2clarity(frameMat, au.fs, 'amdf'); feature=[feature; vecRatio(amdfClarity)]; inputName{end+1}='amdfClarity'; % ====== Feature: local max count of amdf amdfLocalMaxIndex=localMax(amdfMat); amdfLocalMaxCount=sum(amdfLocalMaxIndex); feature=[feature; vecRatio(amdfLocalMaxCount)]; inputName{end+1}='amdfLocalMaxCount'; % ====== Feature: hod ashod = frame2ashod(frameMat, 4); feature=[feature; vecRatio(ashod)]; inputName{end+1}='hod'; % ====== Feature: volume-weighted clarity %vwClarity=volRatio.*clarityRatio; %feature=[feature; vwClarity]; %inputName={inputName{:}, 'vwClarity'}; % ====== add annotation annotation=cell(frameNum); for i=1:frameNum annoStr=sprintf('%s\n%s', au.file, int2str(i)); annoStr=strrep(annoStr, '\', '/'); annoStr=strrep(annoStr, '_', '\_'); annotation{i}=annoStr; end other.annotation=annotation; % ====== Read human-labeled pitch file frameClass=[]; if exist(pvFile, 'file') targetPitch=asciiRead(pvFile); if length(targetPitch)>frameNum, targetPitch=targetPitch(1:frameNum); end % Due to the difference between buffer.m (used before) and buffer2.m (used now) frameClass=targetPitch>0; frameClass=frameClass+1; % {0,1} ===> {1,2} frameClass=frameClass(:)'; end other.inputName=inputName; other.frameTime=frame2sampleIndex(1:frameNum, frameSize, overlap)/au.fs; other.tPitch=targetPitch; % === Put everything together for output au2=au; au2.feature=feature; au2.tOutput=frameClass; au2.other=other; if showPlot plotTitle=strrep(au.file, '\', '/'); plotTitle=strrep(plotTitle, '_', '\_'); DS.input=feature; DS.output=frameClass; DS.inputName=other.inputName; DS.outputName=vsdOpt.outputName; DS.annotation=other.annotation; subplot(211); dsScatterPlot(DS); title(['Raw data: ', plotTitle]); feature2=inputNormalize(feature(1:2, :)); DS.input=feature2(1:2, :); DS.output=frameClass; subplot(212); dsScatterPlot(DS); title(['Normalized data: ', plotTitle]); end % ====== Self demo function selfdemo mObj=mFileParse(which(mfilename)); strEval(mObj.example);