我們可以使用 WSH 來對 Google 進行查詢,並傳回相關結果,例如 原始檔(googleQuery.js):(灰色區域按兩下即可拷貝)query = "孤掌難鳴"; url = "http://www.google.com.tw/search?hl=zh-TW&q="+query+"&meta="; objHttp = new ActiveXObject("Microsoft.XMLHTTP"); objHttp.open("GET", url, false, ""); objHttp.send(); content = objHttp.responseText; //WScript.Echo(content); re = new RegExp("約有<b>(.*?)</b>項符合", ""); found = content.match(re); count=RegExp.$1; WScript.Echo(count); 在上述範例中,我們可以查詢「孤掌難鳴」此成語的出現次數。 此外,我們也可以模仿 Google Fight,找出每一個中文詞的詞頻: 原始檔(googleFight.js):(灰色區域按兩下即可拷貝)// Example usage: cscript googleFight.js 三隻小豬 一窮二白 鶼鰈情深 亡鈇意鄰 // Roger Jang, 20081215, tested under Vista function googleQuery(term){ // WScript.Echo("Querying Google about " + term + "..."); var url = "http://www.google.com.tw/search?hl=zh-TW&q="+term+"&meta="; var objHttp = new ActiveXObject("Microsoft.XMLHTTP"); objHttp.open("GET", url, false, ""); objHttp.send(); content = objHttp.responseText; var re = new RegExp("約有<b>(.*?)</b>項符合", ""); var found = content.match(re); var count=RegExp.$1; return(count); } args=WScript.Arguments; if (args.Count()==0){ WScript.Echo("Usage: " + WScript.ScriptName + " term1 term2 term3 ..."); WScript.Quit(); } for (i=0; i<args.length; i++) WScript.Echo(args(i) + " ===> " + googleQuery(args(i))); 欲測試上述範例,可以在 DOS 視窗下達命令如下: cscript googleFight.js 三隻小豬 一窮二白 鶼鰈情深 亡鈇意鄰 此外,我們也可以借用中研院的 web service 來進行中文的斷詞與詞性分析: 原始檔(sinica.js):(灰色區域按兩下即可拷貝)function seg(sentence) { // get sn url = "http://mt.iis.sinica.edu.tw/cgi-bin/text.cgi?query="+sentence; var http = WScript.CreateObject("Microsoft.XMLHTTP"); http.open("GET", url, false); http.send(); var html = http.responseText; var re = /pool\/(\d+)\.html/; var sn = re.exec(html)[1]; // use this page http://mt.iis.sinica.edu.tw/uwextract/pool/{sn}.tag.txt // convert encoding by php url = "http://exa.zibox.cc/~zi/mt/sinica/en.php?sn="+sn; http.open("GET", url, false); http.send(); html = http.responseText; // parse tokens var rew = / ([^(]+)\((\w+)\)/g; var ws = html.match(rew); var res = new Array(); for(var i=0; i<ws.length; ++i) { var w = new Object(); w.cword = ws[i].replace(rew, "$1"); w.cpos = ws[i].replace(rew, "$2"); res.push(w); } return res; } s = seg("我家有成千上萬的貓,真是太多了呀。"); for(var i in s) WScript.Echo(s[i].cword+s[i].cpos); JScript 程式設計與應用:用於單機的 WSH 環境
query = "孤掌難鳴"; url = "http://www.google.com.tw/search?hl=zh-TW&q="+query+"&meta="; objHttp = new ActiveXObject("Microsoft.XMLHTTP"); objHttp.open("GET", url, false, ""); objHttp.send(); content = objHttp.responseText; //WScript.Echo(content); re = new RegExp("約有<b>(.*?)</b>項符合", ""); found = content.match(re); count=RegExp.$1; WScript.Echo(count);
在上述範例中,我們可以查詢「孤掌難鳴」此成語的出現次數。
此外,我們也可以模仿 Google Fight,找出每一個中文詞的詞頻:
// Example usage: cscript googleFight.js 三隻小豬 一窮二白 鶼鰈情深 亡鈇意鄰 // Roger Jang, 20081215, tested under Vista function googleQuery(term){ // WScript.Echo("Querying Google about " + term + "..."); var url = "http://www.google.com.tw/search?hl=zh-TW&q="+term+"&meta="; var objHttp = new ActiveXObject("Microsoft.XMLHTTP"); objHttp.open("GET", url, false, ""); objHttp.send(); content = objHttp.responseText; var re = new RegExp("約有<b>(.*?)</b>項符合", ""); var found = content.match(re); var count=RegExp.$1; return(count); } args=WScript.Arguments; if (args.Count()==0){ WScript.Echo("Usage: " + WScript.ScriptName + " term1 term2 term3 ..."); WScript.Quit(); } for (i=0; i<args.length; i++) WScript.Echo(args(i) + " ===> " + googleQuery(args(i)));
欲測試上述範例,可以在 DOS 視窗下達命令如下:
function seg(sentence) { // get sn url = "http://mt.iis.sinica.edu.tw/cgi-bin/text.cgi?query="+sentence; var http = WScript.CreateObject("Microsoft.XMLHTTP"); http.open("GET", url, false); http.send(); var html = http.responseText; var re = /pool\/(\d+)\.html/; var sn = re.exec(html)[1]; // use this page http://mt.iis.sinica.edu.tw/uwextract/pool/{sn}.tag.txt // convert encoding by php url = "http://exa.zibox.cc/~zi/mt/sinica/en.php?sn="+sn; http.open("GET", url, false); http.send(); html = http.responseText; // parse tokens var rew = / ([^(]+)\((\w+)\)/g; var ws = html.match(rew); var res = new Array(); for(var i=0; i<ws.length; ++i) { var w = new Object(); w.cword = ws[i].replace(rew, "$1"); w.cpos = ws[i].replace(rew, "$2"); res.push(w); } return res; } s = seg("我家有成千上萬的貓,真是太多了呀。"); for(var i in s) WScript.Echo(s[i].cword+s[i].cpos);