0001 function waveData = getWaveData(speaker, waveDir)
0002
0003
0004
0005
0006
0007
0008 if nargin<1, speaker={'張智星'}; end
0009 if nargin<2, waveDir = 'D:\dataset\asr_hmm\唐詩'; end
0010
0011 if strcmp(speaker{1}, 'all'),
0012 speaker=dir(waveDir);
0013 if isempty(speaker), waveData=[]; return; end
0014 speaker(1:2)=[];
0015 speaker(~[speaker.isdir])=[];
0016 speaker={speaker.name};
0017 end
0018
0019 waveData=[];
0020 for i=1:length(speaker),
0021 fprintf('Reading data from %s... ', speaker{i});
0022 thisWaveData=getSpeakerWaveData(speaker{i}, waveDir);
0023 if length(thisWaveData)==0,
0024 thisWaveData=[];
0025 end
0026 fprintf('%d wave files.\n', length(thisWaveData));
0027 waveData=[waveData; thisWaveData];
0028 end
0029
0030
0031 function waveData = getSpeakerWaveData(speaker, waveDir)
0032
0033
0034
0035
0036
0037
0038
0039 if nargin<1, speaker='張智星'; end
0040 if nargin<2, waveDir = 'd:\dataset\cbmr\哼唱的歌\'; end
0041
0042 if strcmp(waveDir(end), '/') | strcmp(waveDir(end), '\'),
0043 waveDir = waveDir(1:end-1);
0044 end
0045
0046 fileSpec = [waveDir, '\', speaker, '\*.wav'];
0047 waveData = dir(fileSpec);
0048
0049 for i=1:length(waveData),
0050
0051
0052 waveData(i).speaker=speaker;
0053 waveData(i).path=[waveDir, '\', speaker, '\', waveData(i).name];
0054 waveData(i).text = waveData(i).name(1:end-4);
0055 index=find(waveData(i).text=='%');
0056 if ~isempty(index)
0057 waveData(i).text=waveData(i).text(1:index(1)-1);
0058 end
0059 index=find(waveData(i).text=='_');
0060 if ~isempty(index)
0061 waveData(i).text=waveData(i).text(1:index(1)-1);
0062 end
0063 while (waveData(i).text(end)=='#')
0064 waveData(i).text=waveData(i).text(1:end-1);
0065 end
0066
0067
0068
0069 answerFile=[waveDir, '\', speaker, '\answer.txt'];
0070 if exist(answerFile)==2
0071 waveData(i).text=getAnswer(waveData(i).name, answerFile);
0072 end
0073 waveData(i).errorMsg = '';
0074
0075
0076
0077
0078
0079 end
0080
0081
0082
0083 function text=getAnswer(waveFileName, answerFile)
0084 [fileName, answerText]=textread(answerFile, '%s\t%s');
0085 index=findcellstr(fileName, waveFileName(1:end-4));
0086 text=answerText{index};