Home > asr > txtEng2netFile.m

txtEng2netFile

PURPOSE ^

txtEng2netFile: Read an English sentence and convert it to a net file

SYNOPSIS ^

function txtEng2netFile(sentence, netFile, dict)

DESCRIPTION ^

 txtEng2netFile: Read an English sentence and convert it to a net file
    Usage: txtEng2netFile(sentence, netFile, dict)
    If dict is given, generate sausage net which takes 破音字 into consideration.
    If dict is not given, generate linear net.

    For graphic display of the demo example, please refer to toolbox/asr/doc/net4alignment.ppt

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function txtEng2netFile(sentence, netFile, dict)
0002 % txtEng2netFile: Read an English sentence and convert it to a net file
0003 %    Usage: txtEng2netFile(sentence, netFile, dict)
0004 %    If dict is given, generate sausage net which takes 破音字 into consideration.
0005 %    If dict is not given, generate linear net.
0006 %
0007 %    For graphic display of the demo example, please refer to toolbox/asr/doc/net4alignment.ppt
0008 
0009 %    Roger Jang, 20050529
0010 
0011 if nargin<1, selfdemo; return; end
0012 
0013 words=textNormalize4english(sentence);
0014 
0015 % Add 'sil' or 'sp' between words
0016 filler='sp';
0017 for i=1:length(words)
0018     temp{2*i-1}=words{i};
0019     temp{2*i}=filler;
0020 end
0021 temp(end)=[];
0022 words=temp;
0023 wordNum=length(words);
0024 
0025 nodeNum=wordNum+4;
0026 linkNum=wordNum+3;
0027 
0028 % ====== Build up linear net structure
0029 net.nodeNum=nodeNum;
0030 net.linkNum=linkNum;
0031 net.node(1).name='!NULL'; net.node(1).fanOut=3;
0032 net.node(2).name='!NULL'; net.node(2).fanOut=[];
0033 net.node(3).name='sil'; net.node(3).fanOut=5;
0034 net.node(4).name='sil'; net.node(4).fanOut=2;
0035 for i=5:nodeNum
0036     net.node(i).name=words{i-4};
0037     net.node(i).fanOut=i+1;
0038     net.node(i).dicIndex=-1;
0039 end
0040 net.node(nodeNum).fanOut=4;    % Connect to sil
0041 
0042 % 若字典有定義,則抓出每個字在字典的位置,並建立臘腸網以便處理破音字
0043 if nargin==3
0044     dicWords={dict.word};
0045     for i=5:nodeNum
0046         net.node(i).dicIndex=findCellStr(dicWords, net.node(i).name);
0047     end
0048     % 處理 node 3 & 4 (sil)
0049     net.node(3).dicIndex=findCellStr(dicWords, 'sil');
0050     net.node(4).dicIndex=findCellStr(dicWords, 'sil');
0051 end
0052 
0053 % ====== Build sausage net from linear net
0054 snet=net;
0055 prevNodeIdSet=[3];
0056 thisNodeId=5;
0057 for i=thisNodeId:net.nodeNum
0058     thisNodeIdSet=[];
0059     for j=1:length(net.node(i).dicIndex)
0060         dicIndex=net.node(i).dicIndex(j);
0061         if dicIndex>=0
0062             snet.node(thisNodeId).name=[net.node(i).name, '@', int2str(dicIndex-1)];
0063         else
0064             snet.node(thisNodeId).name=net.node(i).name;
0065         end
0066         snet.node(thisNodeId).dicIndex=net.node(i).dicIndex;
0067         thisNodeIdSet=[thisNodeIdSet, thisNodeId];
0068         thisNodeId=thisNodeId+1;
0069     end
0070     for j=1:length(prevNodeIdSet)
0071         snet.node(prevNodeIdSet(j)).fanOut=thisNodeIdSet;
0072     end
0073     prevNodeIdSet=thisNodeIdSet;
0074 end
0075 snet.nodeNum=thisNodeId-1;
0076 for i=1:length(prevNodeIdSet)
0077     snet.node(prevNodeIdSet(i)).fanOut=4;    % Connect to sil
0078 end
0079 
0080 snet.nodeNum=length(snet.node);
0081 snet.linkNum=0;
0082 for i=1:snet.nodeNum
0083     snet.linkNum=snet.linkNum+length(snet.node(i).fanOut);
0084 end
0085 
0086 % ====== Write net file
0087 fid=fopen(netFile, 'w');
0088 if fid<0
0089     error(sprintf('Cannot open %s!', netFile));
0090 end
0091 % ====== Write node info
0092 fprintf(fid, 'VERSION=1.0\r\n');
0093 fprintf(fid, 'N=%d\tL=%d\r\n', snet.nodeNum, snet.linkNum);
0094 fprintf(fid, 'I=0\tW=!NULL\r\n');
0095 fprintf(fid, 'I=1\tW=!NULL\r\n');
0096 fprintf(fid, 'I=2\tW=sil\r\n');
0097 fprintf(fid, 'I=3\tW=sil\r\n');
0098 for i=5:snet.nodeNum
0099     fprintf(fid, 'I=%d\tW=%s\r\n', i-1, snet.node(i).name);
0100 end
0101 % ====== Write link info
0102 fprintf(fid, 'J=0\tS=0\tE=2\r\n');
0103 fprintf(fid, 'J=1\tS=3\tE=1\r\n');
0104 J=2;
0105 for i=3:snet.nodeNum
0106     if i==4        % Link between nodes 4 and 2 are already used
0107         continue;
0108     end
0109     for j=1:length(snet.node(i).fanOut)
0110         s=i-1; e=snet.node(i).fanOut(j)-1;
0111         fprintf(fid, 'J=%d\tS=%d\tE=%d\r\n', J, s, e);
0112         J=J+1;
0113     end
0114 end
0115 fclose(fid);
0116 
0117 % ====== Self demo
0118 function selfdemo
0119 %fprintf('範例一:未給 dict,產生 linear net\n');
0120 %sentence='What is your favorite movie?';
0121 %netFile='test1.net';
0122 %fprintf('Creating "%s" for "%s"... ', netFile, sentence);
0123 %feval(mfilename, sentence, netFile);
0124 %dos(['uedit32 ', netFile]);
0125 %fprintf('Done!\n');
0126 
0127 fprintf('範例二:給定 dict(只包含此句的小字典),產生 sausage net\n');
0128 sentence='what movies have you seen recently?';
0129 netFile='test2.net';
0130 fprintf('Creating "%s" for "%s"... ', netFile, sentence);
0131 dict=dictRead('d:/users/jang/application/asr/source/data/what_movies_have_you_seen_recently.dic');
0132 feval(mfilename, sentence, netFile, dict);
0133 dos(['uedit32 ', netFile]);
0134 fprintf('Done!\n');
0135 
0136 return
0137 
0138 fprintf('範例三:給定 dict(完整字典),產生 sausage net\n');
0139 sentence='The tea for you';
0140 sentence='what movies have you seen recently?';
0141 netFile='test3.net';
0142 fprintf('Creating "%s" for "%s"... ', netFile, sentence);
0143 dict=dictRead('dict/english_sorted.dic');
0144 feval(mfilename, sentence, netFile, dict);
0145 dos(['uedit32 ', netFile]);
0146 fprintf('Done!\n');
0147 
0148 return
0149 
0150 fprintf('範例四:給定 dict(完整字典),產生 sausage net\n');
0151 sentence='What are you allergic to?';
0152 netFile='test4.net';
0153 fprintf('Creating "%s" for "%s"... ', netFile, sentence);
0154 feval(mfilename, sentence, netFile, dict);
0155 dos(['uedit32 ', netFile]);
0156 fprintf('Done!\n');

Generated on Tue 01-Jun-2010 09:50:19 by m2html © 2003