Home > asr > voiceCommand.m

voiceCommand

PURPOSE ^

waveRecog: Wave assessment which generates an CM (confidence measure) file (and a pitch file, if necessary) from a given wave file and the corresponding text

SYNOPSIS ^

function [answer, cmObj, dosCmd, time, exeStatus, exeResult]=waveRecog(waveFile, textFile, language, cmFile, labFile, plpFile, pitchFile, plotOpt)

DESCRIPTION ^

 waveRecog: Wave assessment which generates an CM (confidence measure) file (and a pitch file, if necessary) from a given wave file and the corresponding text
    Usage: [cmObj, dosCmd, time]=waveRecog(waveFile, textFile, language, cmFile, labFile, plpFile, pitchFile, plotOpt)
        waveFile: input wave file
        textFile: input text to be aligned (or a file containing the text)
        language: a string for language option, or a structure for recog. parameters
        cmFile: output CM file ([] if don't care)
        pitchFile: output pitch file ([] if don't care)
        plpFile: output plp file ([] if don't care)
        plotOpt: plot option

        For example:
            mainName='但使龍城飛將在';
            waveFile=[mainName, '.wav'];
            txtFile=which('tangPoem.txt');        % Must be absolute path
            language='chinese';
            cmFile=[mainName, '.cm'];
            labFile=[mainName, '.lab'];
            plpFile=[mainName, '.plp'];
            pitchFile=[mainName, '.pitch'];
            plotOpt=1;
            [answer, cmObj, dosCmd, time, exeStatus, exeResult]=waveRecog(waveFile, txtFile, language, cmFile, labFile, plpFile, pitchFile, plotOpt)

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function [answer, cmObj, dosCmd, time, exeStatus, exeResult]=waveRecog(waveFile, textFile, language, cmFile, labFile, plpFile, pitchFile, plotOpt)
0002 % waveRecog: Wave assessment which generates an CM (confidence measure) file (and a pitch file, if necessary) from a given wave file and the corresponding text
0003 %    Usage: [cmObj, dosCmd, time]=waveRecog(waveFile, textFile, language, cmFile, labFile, plpFile, pitchFile, plotOpt)
0004 %        waveFile: input wave file
0005 %        textFile: input text to be aligned (or a file containing the text)
0006 %        language: a string for language option, or a structure for recog. parameters
0007 %        cmFile: output CM file ([] if don't care)
0008 %        pitchFile: output pitch file ([] if don't care)
0009 %        plpFile: output plp file ([] if don't care)
0010 %        plotOpt: plot option
0011 %
0012 %        For example:
0013 %            mainName='但使龍城飛將在';
0014 %            waveFile=[mainName, '.wav'];
0015 %            txtFile=which('tangPoem.txt');        % Must be absolute path
0016 %            language='chinese';
0017 %            cmFile=[mainName, '.cm'];
0018 %            labFile=[mainName, '.lab'];
0019 %            plpFile=[mainName, '.plp'];
0020 %            pitchFile=[mainName, '.pitch'];
0021 %            plotOpt=1;
0022 %            [answer, cmObj, dosCmd, time, exeStatus, exeResult]=waveRecog(waveFile, txtFile, language, cmFile, labFile, plpFile, pitchFile, plotOpt)
0023 
0024 %    Roger Jang, 20070103, 20070405
0025 
0026 if nargin<1, selfdemo; return; end
0027 if nargin<4, cmFile=[]; end
0028 if nargin<5, labFile=[]; end
0029 if nargin<6, plpFile=[]; end
0030 if nargin<7, pitchFile=[]; end
0031 if nargin<8, plotOpt=0; end
0032 
0033 debug=0;
0034 
0035 % ===== Create text file if necessary
0036 needClean=0;
0037 if exist(textFile)~=2        % textFile is actually the text to be matched
0038     tempTxtFile=[tempname, '.txt'];
0039     fid=fopen(tempTxtFile, 'w'); fprintf(fid, '%s\n', textFile); fclose(fid);
0040     textFile=tempTxtFile;
0041     needClean=1;
0042 end
0043 
0044 % ====== Resample the wave file if necessary
0045 tempWaveFile=[tempname, '.wav'];
0046 wave2wave(waveFile, tempWaveFile, 16000, 16);    % Convert to 16KHz, 16Bits
0047 
0048 % ====== Find the executable
0049 [parentDir, junk, junk, junk]=fileparts(which(mfilename));
0050 exeDir=[parentDir, '\exe'];
0051 executable=[exeDir, '\recog.exe'];
0052 if ~exist(executable)
0053     msg=sprintf('Cannot find %s!\n', executable);
0054     error(msg);
0055 end
0056 
0057 % ====== Create rp if necessary
0058 if isstr(language)    % language='chinese' or 'chinese'
0059     switch (language)
0060         case 'chinese'
0061             % 漢語拼音 (tcc300.mac)
0062             rp.file=[exeDir, '/chinese.vc.hanyu.prm'];
0063             rp.qiYinFile=[exeDir, '/dict/hanyu.qiYin'];
0064         %    % 長庚拼音 (taihua.mac)
0065         %    rp.file=[exeDir, '/chinese1.asr.prm'];
0066         %    rp.qiYinFile=[exeDir, '/dict/taihua.qiYin'];
0067         case 'english'
0068             rp.file='recogParamEnglish.txt';
0069         otherwise
0070             error('Unknown language!');
0071     end
0072     rp.useEpd=0;
0073     rp.outputDir='output';
0074     rp.sylFile='';
0075     rp.netFile='';
0076     rp.wpaFile='';
0077     rp.getPitch=0;
0078 else
0079     rp=language;
0080 end
0081 
0082 if ~isempty(pitchFile) & ~isnan(pitchFile)
0083     rp.getPitch=1;
0084 end
0085 
0086 % ====== Execute the executable
0087 dosCmd=sprintf('%s "%s" "%s" "%s" %d "%s" "%s" "%s" "%s" %d', executable, rp.file, tempWaveFile, textFile, rp.useEpd, rp.outputDir, rp.sylFile, rp.netFile, rp.wpaFile, rp.getPitch);
0088 tic;
0089 currDir=pwd; cd(exeDir);
0090 [exeStatus, exeResult]=dos(dosCmd);
0091 cd(currDir);
0092 time=toc;
0093 if debug
0094     fprintf('dosCmd=%s\n', dosCmd);
0095     fprintf('exeStatus=%d\n', exeStatus);
0096     fprintf('exeResult=%s\n', exeResult);
0097 end
0098 
0099 % ====== Create the CM file is necessary
0100 origCmFile=[exeDir, '/output/phone.cm'];
0101 if ~exist(origCmFile)
0102     msg=sprintf('Cannot find %s!\n', origCmFile);
0103     error(msg);
0104 end
0105 cmObj=cmRead(origCmFile);
0106 if ~(isempty(cmFile) | isnan(cmFile))
0107     [status, message, messageId]=copyfile(origCmFile, cmFile, 'f');
0108     if status~=1
0109         error(sprintf('Error: %s\n'), message);
0110     end
0111 end
0112 
0113 % ====== Create the LAB file is necessary
0114 origLabFile=[exeDir, '/output/phone.lab'];
0115 if ~exist(origLabFile)
0116     msg=sprintf('Cannot find %s!\n', origLabFile);
0117     error(msg);
0118 end
0119 if ~(isempty(labFile) | isnan(labFile))
0120     [status, message, messageId]=copyfile(origLabFile, labFile, 'f');
0121     if status~=1
0122         error(sprintf('Error: %s\n'), message);
0123     end
0124 end
0125 
0126 % ====== Create the plp file if necessary
0127 origPlpFile=[exeDir, '/output/phone.plp'];
0128 if ~exist(origPlpFile)
0129     msg=sprintf('Cannot find %s!\n', origPlpFile);
0130     error(msg);
0131 end
0132 if ~(isempty(plpFile) | isnan(plpFile))
0133     [status, message, messageId]=copyfile(origPlpFile, plpFile, 'f');
0134     if status~=1
0135         error(sprintf('Error: %s\n'), message);
0136     end
0137 end
0138 
0139 % ====== Create the pitch file if necessary
0140 if ~(isempty(pitchFile) | isnan(pitchFile))
0141     origPitchFile=[exeDir, '/output/pitch.txt'];
0142     if ~exist(origPitchFile)
0143         msg=sprintf('Cannot find %s!\n', origPitchFile);
0144         error(msg);
0145     end
0146     [status, message, messageId]=copyfile(origPitchFile, pitchFile, 'f');
0147     if status~=1
0148         error(sprintf('Error: %s\n'), message);
0149     end
0150 end
0151 
0152 % ====== 讀出最後分數
0153 fid=fopen([exeDir, '/output/finalScore.txt'], 'r'); finalScore=fscanf(fid, '%f'); fclose(fid);
0154 % ====== 讀出辨識結果
0155 fid=fopen([exeDir, '/output/recogResult.txt'], 'r'); answer=native2unicode(fscanf(fid, 'Text=%s')); fclose(fid);
0156 
0157 % ====== Plotting if necessary
0158 if plotOpt
0159     if ~rp.getPitch
0160         waveCmPlot(tempWaveFile, cmFile);
0161     else
0162         [y, fs, nbits]=wavRead(tempWaveFile);
0163         frameSize=640; overlap=480;
0164         pitchObj.frameRate=fs/(frameSize-overlap);
0165         pitchObj.signal=asciiRead(pitchFile);
0166 
0167         pitchObj2=pitchObj;
0168         pitchedIndex1=lab2pitchedIndex(labFile, rp.qiYinFile);    % voiced segment
0169         volume=frame2volume(buffer2(y, frameSize, overlap));
0170         pitchedIndex2=find(volume>max(volume)/10);        % volume is big
0171         pitchedIndex=intersect(pitchedIndex1, pitchedIndex2);    % based on both voiced segment and volume
0172         pitch=0*pitchObj2.signal;
0173         pitch(pitchedIndex)=pitchObj.signal(pitchedIndex);
0174         pitchObj2.signal=pitch;
0175 
0176         waveCmPitchPlot(tempWaveFile, cmFile, pitchObj, pitchObj2);
0177         legend('Pitch: whole', 'Pitch2: segmented');
0178     end
0179 end
0180 
0181 % ====== 刪除暫存檔案
0182 if ~debug
0183     delete(tempWaveFile);    % 刪除暫存檔案(若不刪除,temp目錄下太多檔案,會降低執行速度)
0184     if needClean
0185         delete(tempTxtFile);    % 刪除暫存檔案(若不刪除,temp目錄下太多檔案,會降低執行速度)
0186     end
0187 end
0188 
0189 % ====== selfdemo
0190 function selfdemo
0191 
0192 % === 中文範例
0193 tic
0194 mainName='但使龍城飛將在';
0195 mainName='一片孤城萬仞山';
0196 mainName='二十四橋明月夜';
0197 waveFile=[mainName, '.wav'];
0198 txtFile='d:\users\jang\matlab\toolbox\asr\tangPoem.txt';
0199 language='chinese';
0200 cmFile=[mainName, '.cm'];
0201 labFile=[mainName, '.lab'];
0202 plpFile=[mainName, '.plp'];
0203 pitchFile=[mainName, '.pitch'];
0204 plotOpt=1;
0205 [answer, cmObj, dosCmd, time, exeStatus, exeResult]=feval(mfilename, waveFile, txtFile, language, cmFile, labFile, plpFile, pitchFile, plotOpt);
0206 toc
0207 fprintf('exeResult=%s\n', exeResult);
0208 
0209 return
0210 
0211 figure
0212 % === 英文範例
0213 %tic
0214 mainName='what_would_you_like_to_know';
0215 waveFile=[mainName, '.wav'];
0216 text=strrep(mainName, '_', ' ');
0217 language='english';
0218 cmFile=[mainName, '.cm'];
0219 pitchFile=[mainName, '.pitch'];
0220 plpFile=[mainName, '.plp'];
0221 plotOpt=1;
0222 feval(mfilename, waveFile, text, language, cmFile, pitchFile, plpFile, plotOpt);
0223 toc

Generated on Tue 01-Jun-2010 09:50:19 by m2html © 2003