#---------------------------------------------------------------------
# HTMLScanner
#   URL: http://scl.littlestar.jp/revwiki/view.cgi?HTMLScanner
#   : Dice
#   K{Cu: htmlsplit.rb 
#                   (http://www.moonwolf.com/ruby/)
#---------------------------------------------------------------------

require 'htmlsplit'

module HTMLScanner
	# htmlc[\𐶐
	def HTMLScanner.get_tree(html)
		root = HTMLScanner::Root.new

		HTMLScanner._search_children_seq(root, ::HTMLSplit.new(html).document)

		return root
	end

	# nodẽeLXg𒊏o
	def HTMLScanner.get_text(node)
		text = ""
		node.children.each do |child|
			case child
			when Element
				text << HTMLScanner.get_text(child)
			when Text
				text << child.content
			end
		end

		return text
	end


	# c[̃m[h\ۃNX
	class Node
		attr_accessor :children, :parent

		def initialize(parent = nil, method_name = nil)
			@parent = parent
			@children = []

			@parent.children << self if @parent

			if parent && method_name then
				parent.instance_eval(<<-SCRIPT) 
					@methods_#{method_name.to_s} = parent.children.last
					def #{method_name.to_s}
						return @methods_#{method_name.to_s}
					end
				SCRIPT
			end
		end

		# SĂ̎qvf𕽊ĕԂ
		def flatten
			result = []
			@children.each do |child|
				result += child.flatten
			end
			return result
		end

		# to_htmlƓ
		def to_s
			return self.to_html
		end

		def inspect
			return sprintf("#<%s @children=%s>", self.class.to_s, @children.inspect)
		end
	end

	# HTML[g
	class Root < Node
		def to_html
			html = ""

			@children.each do |child|
				html << child.to_html
			end

			return html
		end
	end

	# vf^O
	class Element < Node
		attr_accessor :name, :attributes

		def initialize(parent, method_name, name, attributes = {})
			super(parent, method_name)
			@name = name
			@attributes = attributes
		end

		EMPTY_ELEMENTS = %w(area base basefont bgsound br col frame hr img input isindex
		                    keygen link meta nextid param spacer wbr)
		def to_html
			html = ""

			html << "<" << @name
			attributes.each_pair do |attr_name, attr_value|
				html << %Q| #{attr_name}="#{attr_value}"|
			end

			unless @children.empty? then
				html << ">"
				@children.each do |child|
					html << child.to_html
				end
				html << "</#{@name}>"
			else
				if EMPTY_ELEMENTS.include?(@name)
					html << ">"
				else
					html << "></#{@name}>"
				end
			end

			return html
		end


		def inspect
			return sprintf("#<%s @children=%s, @name=%s, @attributes=%s>",
			               self.class.to_s, @children.inspect, @name.inspect, @attributes.inspect)
		end

	end

	# eLXgSẴm[h̃X[p[NX
	class NodeWithContent < Node
		attr_accessor :content

		def initialize(parent = nil, method_name = nil, content = "")
			super(parent, method_name)
			@content = content
		end

		def inspect
			return sprintf("#<%s @children=%s @content=%s>", self.class.to_s, @children.inspect, @content.inspect)
		end

	end


	# ^`
	class Declaration < NodeWithContent
		def to_html
			return "<!#{@text}>"
		end
	end

	# eLXg
	class Text < NodeWithContent
		def to_html
			return @content
		end
	end

	# Rg
	class Comment < NodeWithContent
		def to_html
			return "<!-- #{@content} -->"
		end
	end



	def HTMLScanner._search_children_seq(parent_node, tokens)
		until tokens.empty? do
			token = tokens.shift

			case token
			when ::CharacterData
				Text.new(parent_node, :text, token.text)

			when ::EmptyElementTag
				element = Element.new( parent_node, token.name, token.name, (token.attr || {}) )

			when ::StartTag
				element = Element.new( parent_node, token.name, token.name, (token.attr || {}) )
				HTMLScanner._search_children_seq(element, tokens)

			when ::EndTag
				break

			when ::Comment
				Comment.new(parent_node, nil, token.text)

			when ::Declaration
				Declaration.new(parent_node, nil, token.text)
			end
		end

		return
	end

end
