Yahooの日本語形態素解析Webサービスのテスト(2)

前回のを改良してみた。
入力された文章から名詞を抽出して、タグクラウドで表示する。
デモはここ
出力の部分はAjax
タグクラウドの作成には、http://yatsu.info/articles/2005/08/05/ruby%E3%81%A7tagcloud-tagcloud-rubyを使わせてもらった。
以下はソース。

yahooMAS.html
<html>
  <head>
    <meta http-equiv="content-type" content="text/html;charset=utf-8">
    <title>YahooMAS</title>
    <script type="text/javascript" src="lib/prototype.js"></script>
    <script type="text/javascript" src="yahooMAS.js"></script>
  </head>
  <body>
    <h1>YahooMAS</h1>
    <form method="get" id="form" onsubmit="parse(); return false;">
      <textarea id="sentence" rows="5" cols="80">ここに書き込んで!
      </textarea><br />
      <input type="submit" value="形態素解析"><br />
    </form>
    <div id="result"></div>
  </body>
</html>
yahooMAS.js
function parse(){
    var sentence = $F('sentence');
    sentence = sentence.replace(/&/g, "&");

    var url = 'yahooMAS.rb';
    var pars = 'sentence=' + sentence;
    
    var myAjax = new Ajax.Request(
				  url,
				  {
				      method: 'get',
				      parameters: pars,
				      onComplete: showResult
				  });
}

function showResult(originalRequest){
    $('result').innerHTML = originalRequest.responseText;
    //$('sentence').value = '';
}
yahooMAS.rb
#!/usr/local/bin/ruby
require 'open-uri'
require 'cgi'
require 'kconv'
require 'rexml/document'
require 'tagcloud'
print "Content-Type: text/html\n\n"

cgi = CGI.new
url = 'http://api.jlp.yahoo.co.jp/MAService/V1/parse'
appid = 'あなたのUSERID'
results = 'uniq'
filter = '9' # 名詞
sentence = cgi['sentence'].toutf8
sentence = CGI.escape(sentence)
url = url + '?appid=' + appid + '&results=' + results + 
  '&filter=' + filter + '&sentence=' + sentence
doc = nil

open(url){|xml|
  doc = REXML::Document.new(xml)
}

counts = Array.new
surfaces = Array.new
counts = REXML::XPath.match(doc, "//count")
surfaces = REXML::XPath.match(doc, "//surface")

#puts %Q(<a href="#{url}">XMLへのリンク</a><br /><br />)
cloud = TagCloud.new
counts.size.times{|idx|
  cloud.add(surfaces[idx].text, "", counts[idx].text.to_i)
}
print cloud.html_and_css
tagcloud.rb

http://yatsu.info/articles/2005/08/05/ruby%E3%81%A7tagcloud-tagcloud-ruby

# Author: Masaki Yatsu <yatsu@yatsu.info>

class TagCloud
  def initialize
    @counts = Hash.new
    @urls = Hash.new
  end

  def add(tag, url, count)
    @counts[tag] = count
    @urls[tag] = url
  end

  def css
    text = "" 
    for level in 0..24
      font = 12 + level
      text << "span.tagcloud#{level} {font-size: #{font}px;}\n" 
      text << "span.tagcloud#{level} a {text-decoration: none;}\n" 
    end
    text
  end

  def html(limit = nil)
    tags = @counts.sort_by {|a, b| b }.reverse.map {|a, b| a }
    tags = tags[0..limit-1] if limit
    if tags.empty?
      return "" 
    elsif tags.size == 1
      tag = tags[0]
      url = @urls[tag]
      return %{<span class="tagcloud24"><a href="#{url}">#{tag}</a></span>\n}
    end

    min = Math.sqrt(@counts[tags.last])
    max = Math.sqrt(@counts[tags.first])
    factor = 0

    # special case all tags having the same count
    if max - min == 0
      min = min - 24
      factor = 1
    else
      factor = 24 / (max - min)
    end

    html = "" 
    tags.sort.each do |tag|
      count = @counts[tag]
      url   = @urls[tag]
      level = ((Math.sqrt(count) - min) * factor).to_i
      html << %{<span class="tagcloud#{level}"><a href="#{url}">#{tag}</a></span>\n}
    end
    html
  end

  def html_and_css(limit = nil)
    "<style>\n#{self.css}</style>\n#{self.html(limit)}" 
  end
end