%% Tutorial on coin recognition % This tutorial explains the basics of coin recognition based on the sound when the coin is dropped to the ground. %% Preprocessing % Before we start, let's add necessary toolboxes to the search path of MATLAB: addpath d:/users/jang/matlab/toolbox/utility addpath d:/users/jang/matlab/toolbox/sap addpath d:/users/jang/matlab/toolbox/machineLearning %% % All the above toolboxes can be downloaded from the author's . % Make sure you are using the latest toolboxes to work with this script. %% % For compatibility, here we list the platform and MATLAB version that we used to run this script: fprintf('Platform: %s\n', computer); fprintf('MATLAB version: %s\n', version); fprintf('Script starts at %s\n', char(datetime)); scriptStartTime=tic; % Timing for the whole script %% Dataset collection % First of all, we can collect all the sound files. The dataset can be found at <../coinSound.rar this link>. % We can use the commmand "mmDataCollect" to collect all the file information: auDir='coinSound'; opt=mmDataCollect('defaultOpt'); opt.extName='wav'; auSet=mmDataCollect(auDir, opt, 1); %% % We need to perform feature extraction and put all the dataset into a format that is easier for further processing, % including classifier construction and evaluation. myTic=tic; %if ~exist('ds.mat', 'file') opt=dsCreateFromMm('defaultOpt'); opt.auFeaFcn=@auFeaMfcc; % Function for feature extraction opt.auEpdOpt.method='vol'; %opt.auEpdOpt.volRatio=0.02; % To have the right EPD, but it doesn't help recognition! ds=dsCreateFromMm(auSet, opt); fprintf('Saving ds.mat...\n'); save ds ds %else % fprintf('Loading ds.mat...\n'); load ds.mat %end fprintf('time=%g sec\n', toc(myTic)); %% % Now all the frame-based features are extracted and stored in "ds". % Next we can try to plot the extracted features for each class: figure; dsFeaVecPlot(ds); %% Performance evaluation % Now we want to do performance evaluation on LOFOCV (leave-one-file-out cross validation), % where each file is a recording of a complete sound event. LOFOCV is proceeded as follows: opt=perfLoo4audio('defaultOpt'); [ds2, fileRr, frameRr]=perfLoo4audio(ds, opt); fprintf('Frame-based leave-one-file-out RR=%g%%\n', frameRr*100); fprintf('File-based leave-one-file-out RR=%g%%\n', fileRr*100); %% % We can plot the frame-based confusion matrix: confMat=confMatGet(ds2.output, ds2.frameClassIdPredicted); confOpt=confMatPlot('defaultOpt'); confOpt.className=ds.outputName; figure; confMatPlot(confMat, confOpt); %% % We can also plot the file-based confusion matrix: confMat=confMatGet(ds2.fileClassId, ds2.fileClassIdPredicted); confOpt=confMatPlot('defaultOpt'); confOpt.className=ds.outputName; figure; confMatPlot(confMat, confOpt); %% % We can also list all the misclassified sounds in a table: for i=1:length(auSet) auSet(i).classPredicted=ds.outputName{ds2.fileClassIdPredicted(i)}; end mmDataList(auSet); %% Dimensionality reduction % In order to visualize the distribution of the dataset, we need to project % the original dataset into 2-D space. This can be achieved by LDA (linear discriminant analysis): ds2d=lda(ds); ds2d.input=ds2d.input(1:2, :); figure; dsScatterPlot(ds2d); xlabel('Input 1'); ylabel('Input 2'); title('MFCC projected on the first 2 lda vectors'); %% % As can be seen from the scatter plot, the overlap between "10" and "50" % is the largest among all class pairs, indicating that these two classes are likely to be confused with each other. % This is also verified by the confusion matrices shown earlier. %% % Actually it is possible to do LDA projection and obtain the corresponding % accuracies vs. dimensionalities via leave-one-out cross validation over KNNC: opt=ldaPerfViaKnncLoo('defaultOpt'); opt.mode='exact'; recogRate1=ldaPerfViaKnncLoo(ds, opt); ds2=ds; ds2.input=inputNormalize(ds2.input); % input normalization recogRate2=ldaPerfViaKnncLoo(ds2, opt); [featureNum, dataNum] = size(ds.input); plot(1:featureNum, 100*recogRate1, 'o-', 1:featureNum, 100*recogRate2, '^-'); grid on legend('Raw data', 'Normalized data', 'location', 'southeast'); xlabel('No. of projected features based on LDA'); ylabel('LOO recognition rates using KNNC (%)'); %% % We can also perform input selection to reduce dimensionality: myTic=tic; z=inputSelectSequential(ds, inf, [], [], 1); figEnlarge; toc(myTic) %% % It seems the feature selection is not very effective since the accuracy % is the best when all the inputs are selected. %% % After dimensionality reduction, we can perform all combinations of % classifiers and input normalization to search the best performance % via leave-one-out cross validation: myTic=tic; poOpt=perfCv4classifier('defaultOpt'); poOpt.foldNum=inf; % Leave-one-out cross validation figure; [perfData, bestId]=perfCv4classifier(ds, poOpt, 1); toc(myTic) structDispInHtml(perfData, 'Performance of various classifiers via cross validation'); %% % Then we can display the confusion matrix corresponding to the best classifier and the best input normalization scheme: confMat=confMatGet(ds.output, perfData(bestId).bestComputedClass); confOpt=confMatPlot('defaultOpt'); confOpt.className=ds.outputName; figure; confMatPlot(confMat, confOpt); %% opt=perfLoo4audio('defaultOpt'); opt.classifier='qc'; opt.classifierOpt=feval([opt.classifier, 'Train'], 'defaultOpt'); [ds2, fileRr, frameRr]=perfLoo4audio(ds, opt); fprintf('Frame-based leave-one-file-out RR=%g%%\n', frameRr*100); fprintf('File-based leave-one-file-out RR=%g%%\n', fileRr*100); %% Summary % This is a brief tutorial which uses the basic techniques in pattern recognition. % There are several directions for further improvement: % % * Explore other features (such as magnitude spectrum) % * Verify that endpoint detection has been performed correctly on each recording % * Use other classifiers % %% Appendix % List of functions and datasets used in this script % % * <../list.asp List of files in this folder> % %% % Date and time when finishing this script: fprintf('Date & time: %s\n', char(datetime)); %% % Overall elapsed time: toc(scriptStartTime) %% % .