0001 function hanyuSeq=text2pa4chinese(sentence, wpaList)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 if nargin<1, selfdemo; return; end
0018 if nargin<2, wpaList=wpaRead('chinese.hanyu.wpa'); end
0019 wordList={wpaList.word};
0020
0021
0022 outIndex=[];
0023 pos=1;
0024 while (pos<=length(sentence))
0025 found=0;
0026 for len=5:-1:1
0027 start=pos;
0028 stop=min(pos+len-1, length(sentence));
0029 str=sentence(start:stop);
0030
0031 index=find(strcmp(wordList, str));
0032 if ~isempty(index)
0033 outIndex=[outIndex, index(1)];
0034 pos=pos+len;
0035 found=1;
0036
0037 break;
0038 end
0039 end
0040 if ~found
0041 pos=pos+1;
0042 end
0043 end
0044
0045 for i=1:length(outIndex)
0046 hanyuSeq(i).word=wpaList(outIndex(i)).word;
0047 temp=split(wpaList(outIndex(i)).pa, '#');
0048 hanyuSeq(i).pa=temp{1};
0049 end
0050
0051
0052 function selfdemo
0053 sentence='我到廟口吃小吃';
0054 hanyuSeq=text2pa4chinese(sentence);
0055 fprintf('Input = %s\n', sentence);
0056 fprintf('Output = %s\n', join({hanyuSeq.pa}, '#'));
0057 sentence='我們三人參加會議';
0058 hanyuSeq=text2pa4chinese(sentence);
0059 fprintf('Input = %s\n', sentence);
0060 fprintf('Output = %s\n', join({hanyuSeq.pa}, '#'));
0061 sentence='「海角七號」的茂伯是國寶ㄟ!';
0062 hanyuSeq=text2pa4chinese(sentence);
0063 fprintf('Input = %s\n', sentence);
0064 fprintf('Output = %s\n', join({hanyuSeq.pa}, '#'));
0065