%% Tutorial on tone recognition for isolated characters
% In this tutorial, we shall explain the basics of Mandarin tone recognition for isolated characters.
% The dataset is availabe at <http://??? http://???>. 
%% Preprocessing
% Before we start, let's add necessary toolboxes to the search path of MATLAB:
addpath d:/users/jang/matlab/toolbox/utility
addpath d:/users/jang/matlab/toolbox/machineLearning
%%
% For compatibility, here we list the platform and MATLAB version that we used to run this script:
fprintf('Platform: %s\n', computer);
fprintf('MATLAB version: %s\n', version);
scriptStartTime=tic;
%% Dataset construction
% First of all, we shall collect all the recording data from the corpus directory.
audioDir='D:\dataSet\mandarinTone\2013-msar';
auData=recursiveFileList(audioDir, 'wav');
auData=auData(1:100);	% Use only the first 100 recordings for simplicity
fprintf('Collected %d recordings...\n', length(auData));
%%
% Since each recording contains 4 tones, we need to perform endpoint
% detection in order to have 4 segments corresponding to these 4 tones:
fs=16000;
if ~exist('auData.mat', 'file')
	tic
	epdPrm=epdPrmSet(fs);
	for i=1:length(auData)
		fprintf('%d/%d, file=%s\n', i, length(auData), auData(i).path);
		aObj=myAudioRead(auData(i).path);
		[~, ~, auData(i).segment]=epdByVol(aObj, epdPrm);
		auData(i).segmentCount=length(auData(i).segment);
		auData(i).obj=aObj;
	end
	toc
	fprintf('Saving auData.mat...\n');
	save auData auData
else
	fprintf('Loading auData.mat...\n');
	load auData.mat
end
%%
% Since our endpoint detection cannot always successfully find these 4 segments, we can simply
% remove those recordings which cannot be correctly segmented:
keepIndex=[auData.segmentCount]==4;
auData=auData(keepIndex);
fprintf('Keep %d recordings for further analysis\n', length(auData));
%%
% After this step, each recording should have 4 segments corresponding to 4
% tones. Then we can perform pitch training on these segments:
fprintf('Pitch tracking...\n');
nbits=16;
pfType=1;	% 0 for AMDF, 1 for ACF
ptOpt=ptOptSet(fs, nbits, pfType);
ptOpt.frameSize=640; ptOpt.overlap=640-160;	% frameSize 是 HTK 的兩倍，frame rate=100, 搭配語音辨識使用
ptOpt.useVolThreshold=0;
ptOpt.useClarityThreshold=0;
%ptOpt.mainFun='maxPickingOverPf';
for i=1:length(auData)
	fprintf('%d/%d, file=%s\n', i, length(auData), auData(i).path);
	for j=1:length(auData(i).segment)
		tempObj=auData(i).obj;
		tempObj.signal=tempObj.signal(auData(i).segment(j).beginSample:auData(i).segment(j).endSample);
		auData(i).segment(j).pitch=pitchTrackingForcedSmooth(tempObj, ptOpt);
	end
end
%%
% After pitch tracking, we need to extracxt features. This is accomplished
% in the following 4 steps:
%
% * Interpolate the original pitch to have a fixed length of 100.
% * Subtract the mean of the interpolated pitch, such that its average value is 0.
% * Use a 3-order polynomial to fit the interpolated pitch, and use the returned 4 coefficients as the features for tone recognition.
fprintf('Feature extraction...\n');
for i=1:length(auData)
	fprintf('%d/%d, file=%s\n', i, length(auData), auData(i).path);
	for j=1:length(auData(i).segment)
		pitch=auData(i).segment(j).pitch;
		pitch2=interp1(1:length(pitch), pitch, linspace(1, length(pitch)));
		pitchNorm=pitch2-mean(pitch2);
		coef=polyfit(linspace(1, length(pitch)), pitchNorm, 3);
		auData(i).segment(j).coef=coef(:);
		auData(i).segment(j).pitchNorm=pitchNorm(:);
	end
end
%%
% Once we have all the features for the recordings, we can create the dataset
% for further exploration.
segment=[auData.segment];
ds.input=[]; ds.output=[];
for i=1:4
	toneData(i).segment=segment(i:4:end);
	ds.input=[ds.input, [toneData(i).segment.coef]];
	ds.output=[ds.output, i*ones(1, length(toneData(i).segment))];
end
ds.outputName={'tone1', 'tone2', 'tone3', 'tone4'};
ds.inputName={'c1', 'c2', 'c3', 'c4'};
%% Dataset visualization
% Once we have every piece of necessary information stored in "ds",
% we can invoke many different functions in Machine Learning Toolbox for
% data visualization and classification.
%%
% For instance, we can display the size of each class:
figure;
[classSize, classLabel]=dsClassSize(ds, 1);
%%
% We can plot the distribution of each features within each class:
figure; dsBoxPlot(ds);
%%
% The box plots indicate the ranges of the features vary a lot. To verify,
% we can simply plot the range of features of the dataset:
figure; dsRangePlot(ds);
%%
% Big range difference cause problems in distance-based classification. To
% avoid this, we can simply normalize the features:
ds2=ds;
ds2.input=inputNormalize(ds2.input);
%%
% We can plot the feature vectors within each class:
figure; dsFeaVecPlot(ds); figEnlarge;
%%
% We can do the scatter plots on every 2 features:
figure; dsProjPlot2(ds); figEnlarge;
%%
% It is hard to see the above plots due to a large difference in the range of each features.
% We can try the same plot with normalized inputs:
figure; dsProjPlot2(ds2); figEnlarge;
%%
% We can also do the scatter plots in the 3D space:
figure; dsProjPlot3(ds2); figEnlarge;
%%
% In order to visualize the distribution of the dataset,
% we can project the original dataset into 2-D space.
% This can be achieved by LDA (linear discriminant analysis):
ds2d=lda(ds);
ds2d.input=ds2d.input(1:2, :);
figure; dsScatterPlot(ds2d); xlabel('Input 1'); ylabel('Input 2');
title('Features projected on the first 2 lda vectors');
%% Classification
% We can try the most straightforward KNNC (k-nearest neighbor classifier):
rr=knncLoo(ds);
fprintf('rr=%g%% for ds\n', rr*100);
%%
% For normalized dataset, usually we can obtain a better accuracy:
[rr, computed]=knncLoo(ds2);
fprintf('rr=%g%% for ds2 of normalized inputs\n', rr*100);
%%
% We can plot the confusion matrix:
confMat=confMatGet(ds2.output, computed);
opt=confMatPlot('defaultOpt');
opt.className=ds.outputName;
opt.mode='both';
figure; confMatPlot(confMat, opt); figEnlarge;
%%
% We can perform input selection to find the best features:
figure; tic; inputSelectSequential(ds2, inf, 'knnc', 1); toc
%%
% We can even perform an exhaustive search on the classifiers and the way
% of input normalization:
opt=perfCv4classifier('defaultOpt');
opt.foldNum=10;
tic; [perfData, bestId]=perfCv4classifier(ds, opt, 1); toc
structDispInHtml(perfData, 'Performance of various classifiers via cross validation');
%%
% We can then display the confusion matrix of the best classifier:
confMat=confMatGet(ds.output, perfData(bestId).bestComputedClass);
opt=confMatPlot('defaultOpt');
opt.className=ds.outputName;
figure; confMatPlot(confMat, opt); figEnlarge;
%% Summary
% This is a brief tutorial on leaf recognition based on its shape and color statistics.
% There are several directions for further improvement:
%
% * Explore other features, such as vein distribution
% * Try the classification problem using the whole dataset
% * Use template matching as an alternative to improve the performance
%
%%
% Overall elapsed time:
toc(scriptStartTime)
%%
% <http://mirlab.org/jang Jyh-Shing Roger Jang>, created on
date