0001 function newWpa=wpaExtract(text, wpa)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 if nargin<1, selfdemo; return; end
0012
0013 wpaWordList={wpa.word};
0014 words=textNormalize4english(text);
0015
0016 foundIndex=[];
0017 missIndex=[];
0018 for i=1:length(words)
0019
0020 word=words{i};
0021 index=find(strcmp(word, wpaWordList));
0022 if length(index)>0
0023 foundIndex=[foundIndex, index];
0024 else
0025 fprintf('Warning: Cannot find "%s" in the given dict!\n', word);
0026 missIndex=[missIndex, i];
0027 end
0028 end
0029
0030 if ~isempty(missIndex)
0031 logFile='missingWord.txt';
0032 fprintf('Save the missing words to %s!\n', logFile);
0033
0034 fid=fopen(logFile, 'a');
0035 for i=1:length(missIndex)
0036 fprintf(fid, '%s\r\n', words{missIndex(i)});
0037 end
0038 fclose(fid);
0039 pause(0.2);
0040 end
0041
0042 foundIndex=unique(foundIndex);
0043 newWpa=wpa(foundIndex);
0044
0045
0046 function selfdemo
0047 text='what movies have you seen recently?';
0048 wpaFile='d:/users/jang/application/asr/dict/english.wpa';
0049 fprintf('Reading %s...\n', wpaFile);
0050 wpa=wpaRead(wpaFile);
0051 newWpa=wpaExtract(text, wpa);
0052 newWpaFile='test.wpa';
0053 fprintf('Writing %s...\n', newWpaFile);
0054 wpaWrite(newWpa, newWpaFile);
0055 type(newWpaFile);