% Performance evaluation load speakerData.mat sidPrm=sidPrmSet; sentenceNum=length([speakerData2.sentence]); for p=1:length(speakerData1) for q=1:length(speakerData1(p).sentence) speakerData1(p).sentence(q).dsInput=[]; speakerData1(p).sentence(q).dsOutput=[]; end end DS.input=[]; DS.output=[]; % ====== Speaker ID by dtw1, dtw2, distLinScaling for i=1:length(speakerData2) tInit=clock; name=speakerData2(i).name; fprintf('%d/%d: speaker=%s\n', i, length(speakerData2), name); for j=1:length(speakerData2(i).sentence) % fprintf('\tsentence=%d ==> ', j); % t0=clock; inputSentence=speakerData2(i).sentence(j); for p=1:length(speakerData1) for q=1:length(speakerData1(p).sentence) % === Collect DS.input k=size(speakerData1(p).sentence(q).dsInput, 2)+1; speakerData1(p).sentence(q).dsInput(1, k)=dtw1(inputSentence.fea, speakerData1(p).sentence(q).fea, 1, 1); speakerData1(p).sentence(q).dsInput(2, k)=dtw2(inputSentence.fea, speakerData1(p).sentence(q).fea, 1, 1); speakerData1(p).sentence(q).dsInput(3, k)=distLinScaling(inputSentence.fea, speakerData1(p).sentence(q).fea); % speakerData1(p).sentence(q).dsInput(4, k)=dtw1(inputSentence.vol, speakerData1(p).sentence(q).vol, 1, 1); % speakerData1(p).sentence(q).dsInput(5, k)=dtw2(inputSentence.vol, speakerData1(p).sentence(q).vol, 1, 1); % speakerData1(p).sentence(q).dsInput(6, k)=distLinScaling(inputSentence.vol, speakerData1(p).sentence(q).vol); % === Collect DS.output speakerData1(p).sentence(q).dsOutput(1, k)=1+strcmp(speakerData2(i).sentence(j).text, speakerData1(p).sentence(q).text); % fprintf('q=%d, text1=%s, text2=%s, output=%d\n', q, speakerData2(i).sentence(j).text, speakerData1(p).sentence(q).text, speakerData1(p).sentence(q).dsOutput(1, k)); pause end end % fprintf(' Name = %s, ave. time = %.2f sec\n', speakerData2(i).name, etime(clock, t0)/length(speakerData2(i).sentence)); end % speakerData2(i).correct=[speakerData2(i).sentence.correct]; % speakerData2(i).rr=sum(speakerData2(i).correct)/length(speakerData2(i).correct); % fprintf('\tAve. time = %.2f sec\n', etime(clock, tInit)/length(speakerData2(i).sentence)); end allSentences=[speakerData1.sentence]; DS.input=cat(2, allSentences.dsInput); DS.output=cat(2, allSentences.dsOutput); DS.input(DS.input>2e9)=inf; index1=find(isinf(DS.input(1,:))); index2=find(isinf(DS.input(2,:))); index=union(index1, index2); DS.input(:,index)=[]; DS.output(:, index)=[]; %dsScatterPlot(DS); %dsClassSize(DS, 1); fprintf('Saving DS.mat...\n'); save DS DS return % ====== Linear classifier trainPrm=lincTrainPrmSet('method', 'batchLearning', 'animation', 'yes', 'printInterval', 30); [coef, recogRate]=lincTrain(DS, trainPrm); fprintf('Recog. rate = %.2f%%\n', 100*recogRate); % ====== GMM classifier TS=DS; DS.input(:, 2:2:end)=[]; DS.output(:,2:2:end)=[]; TS.input(:, 1:2:end)=[]; TS.output(:,1:2:end)=[]; [DS.input, mu, sigma]=inputNormalize(DS.input); % Input normalization for DS TS.input=inputNormalize(TS.input, mu, sigma); % Input normalization for TS count1=dsClassSize(DS); count2=dsClassSize(TS); vecOfGaussianNum=1:min([count1, count2]); covType=1; gmmTrainPrm=gmmTrainPrmSet; gmmTrainPrm.plotOpt=1; [gmmData, recogRate1, recogRate2]=gmmcTrainEvalWrtGaussianNum(DS, TS, vecOfGaussianNum, covType, gmmTrainPrm);