Yahooの日本語形態素解析Webサービスのテスト(2)
前回のを改良してみた。
入力された文章から名詞を抽出して、タグクラウドで表示する。
デモはここ。
出力の部分はAjax。
タグクラウドの作成には、http://yatsu.info/articles/2005/08/05/ruby%E3%81%A7tagcloud-tagcloud-rubyを使わせてもらった。
以下はソース。
yahooMAS.html
<html> <head> <meta http-equiv="content-type" content="text/html;charset=utf-8"> <title>YahooMAS</title> <script type="text/javascript" src="lib/prototype.js"></script> <script type="text/javascript" src="yahooMAS.js"></script> </head> <body> <h1>YahooMAS</h1> <form method="get" id="form" onsubmit="parse(); return false;"> <textarea id="sentence" rows="5" cols="80">ここに書き込んで! </textarea><br /> <input type="submit" value="形態素解析"><br /> </form> <div id="result"></div> </body> </html>
yahooMAS.js
function parse(){ var sentence = $F('sentence'); sentence = sentence.replace(/&/g, "&"); var url = 'yahooMAS.rb'; var pars = 'sentence=' + sentence; var myAjax = new Ajax.Request( url, { method: 'get', parameters: pars, onComplete: showResult }); } function showResult(originalRequest){ $('result').innerHTML = originalRequest.responseText; //$('sentence').value = ''; }
yahooMAS.rb
#!/usr/local/bin/ruby require 'open-uri' require 'cgi' require 'kconv' require 'rexml/document' require 'tagcloud' print "Content-Type: text/html\n\n" cgi = CGI.new url = 'http://api.jlp.yahoo.co.jp/MAService/V1/parse' appid = 'あなたのUSERID' results = 'uniq' filter = '9' # 名詞 sentence = cgi['sentence'].toutf8 sentence = CGI.escape(sentence) url = url + '?appid=' + appid + '&results=' + results + '&filter=' + filter + '&sentence=' + sentence doc = nil open(url){|xml| doc = REXML::Document.new(xml) } counts = Array.new surfaces = Array.new counts = REXML::XPath.match(doc, "//count") surfaces = REXML::XPath.match(doc, "//surface") #puts %Q(<a href="#{url}">XMLへのリンク</a><br /><br />) cloud = TagCloud.new counts.size.times{|idx| cloud.add(surfaces[idx].text, "", counts[idx].text.to_i) } print cloud.html_and_css
tagcloud.rb
http://yatsu.info/articles/2005/08/05/ruby%E3%81%A7tagcloud-tagcloud-ruby
# Author: Masaki Yatsu <yatsu@yatsu.info> class TagCloud def initialize @counts = Hash.new @urls = Hash.new end def add(tag, url, count) @counts[tag] = count @urls[tag] = url end def css text = "" for level in 0..24 font = 12 + level text << "span.tagcloud#{level} {font-size: #{font}px;}\n" text << "span.tagcloud#{level} a {text-decoration: none;}\n" end text end def html(limit = nil) tags = @counts.sort_by {|a, b| b }.reverse.map {|a, b| a } tags = tags[0..limit-1] if limit if tags.empty? return "" elsif tags.size == 1 tag = tags[0] url = @urls[tag] return %{<span class="tagcloud24"><a href="#{url}">#{tag}</a></span>\n} end min = Math.sqrt(@counts[tags.last]) max = Math.sqrt(@counts[tags.first]) factor = 0 # special case all tags having the same count if max - min == 0 min = min - 24 factor = 1 else factor = 24 / (max - min) end html = "" tags.sort.each do |tag| count = @counts[tag] url = @urls[tag] level = ((Math.sqrt(count) - min) * factor).to_i html << %{<span class="tagcloud#{level}"><a href="#{url}">#{tag}</a></span>\n} end html end def html_and_css(limit = nil) "<style>\n#{self.css}</style>\n#{self.html(limit)}" end end