function [speakerSet1, speakerSet2]=sidFeaExtract(sidOpt) % Feature extraction for speaker identification % ====== Feature extration eval(sprintf('tInit_%s=clock;\n', mfilename)); sidOpt=sidOptSet; % ====== Generate speakerSet if ~exist([sidOpt.outputDir, '\speakerSet.mat']) % ====== Read set 1 speakerSet1=speakerSetRead(sidOpt.auDir01, sidOpt.sentenceNumPerSpeaker, sidOpt.maxSpeakerNum); fprintf('Get wave info of %d persons from auDir01=%s\n', length(speakerSet1), sidOpt.auDir01); speakerSet1=speakerSetAddFea(speakerSet1, sidOpt); % Add mfcc to speakerSet1 % ====== Read set 2 speakerSet2=speakerSetRead(sidOpt.auDir02, sidOpt.sentenceNumPerSpeaker, sidOpt.maxSpeakerNum); fprintf('Get wave info of %d persons from auDir02=%s\n', length(speakerSet2), sidOpt.auDir02); speakerSet2=speakerSetAddFea(speakerSet2, sidOpt); % Add mfcc to speakerSet2 % ====== Data sync: Remove individuals not in set 1 and 2 % ====== Delete speaker in set1 but not in set2 commonName=intersect({speakerSet1.name}, {speakerSet2.name}); for i=1:length(speakerSet1), tag1(i)=~isempty(find(strcmp(speakerSet1(i).name, commonName))); end speakerSet1=speakerSet1(tag1); % ====== Delete speaker in set2 but not in set1 for i=1:length(speakerSet2), tag2(i)=~isempty(find(strcmp(speakerSet2(i).name, commonName))); end speakerSet2=speakerSet2(tag2); % ====== Double check name1={speakerSet1.name}; name2={speakerSet2.name}; if ~all(strcmp(name1, name2)), error('Sync is not done yet!'); end % ====== Mix datasets 1 and 2 if necessary if sidOpt.mixData==1 % Merge speakerSet1 and speakerSet2 for training/test to verify the approach works speakerSet=speakerSetMerge(speakerSet1, speakerSet2); fprintf('Number of total wave files: %d\n', length(speakerSet)); % ====== Split the speakerSet into training (speakerSet1) and test data (speakerSet2) speakerNum=length(speakerSet); speakerSet1=speakerSet; speakerSet2=speakerSet; for i=1:speakerNum speakerSet1(i).sentence=speakerSet1(i).sentence(1:2:end); % Odd-indexed utterances as the training set speakerSet2(i).sentence=speakerSet2(i).sentence(2:2:end); % Even-indexed utterances as the test set end end fprintf('Saving %s/speakerSet.mat...\n', sidOpt.outputDir); eval(sprintf('save %s/speakerSet speakerSet1 speakerSet2', sidOpt.outputDir)); else fprintf('Loading %s/speakerSet.mat...\n', sidOpt.outputDir); eval(sprintf('load %s/speakerSet.mat', sidOpt.outputDir)); % === Keep only necessary sentences for i=1:length(speakerSet1) if sidOpt.sentenceNumPerSpeaker %g seconds\n', mfilename, time); if strcmp(sidOpt.method, 'dtw'), return; end % The following operations are not for DTW % ====== Put the raw feature data into DS and TS, for GMM only if sidOpt.maxSpeakerNum '); tic, DS=speakerSet2ds(speakerSet1); fprintf('%.2f seconds\n', toc); clear speakerSet1 fprintf('Prepare TS ===> '); tic, TS=speakerSet2ds(speakerSet2); fprintf('%.2f seconds\n', toc); clear speakerSet2 fprintf('Saving output/mat/dsts.mat...\n'); eval(sprintf('save %s/dsts.mat DS TS', sidOpt.outputDir)); else clear speakerSet1 speakerSet2 fprintf('Loading %s/dsts.mat...\n', sidOpt.outputDir); eval(sprintf('load %s/dsts.mat', sidOpt.outputDir)); end % ====== Feature transformation, for GMM only switch (sidOpt.feaTransformMode) case 1 % LDA % dim=size(DS.input, 1); dim=sidOpt.transformedFeaDim; [DS, transformVec]=lda(DS, dim); TS.input = transformVec'*TS.input; case 2 % PCA % dim=size(DS.input, 1); dim=sidOpt.transformedFeaDim; [DS, transformVec]=pca(DS, dim); TS.input = transformVec'*TS.input; end