require 'planet/transmogrify' require 'planet/sift' module Planet def Planet.harvest source doc = Planet::Transmogrify.parse(open(source)) doc.attributes['xml:base'] = source # augment the document with feed parser attributes class << doc attr_accessor :feed, :entries end # Anchor the dynamic dictionaries doc.feed = Feed.new(doc.root) doc.entries = doc.root.elements.to_a('entry').map {|entry| Entry.new(entry)} doc end # A dynamic dictionary that allows attributes to be accessed via indexing class UserDict attr_accessor :node def initialize node @node = node || REXML::Element.new('') end def [](index) respond_to?(index) ? send(index) : nil end # method generator for elements whose value is defined by its text child def UserDict.text_element *names names.each do |name| define_method name do element = @node.elements[name.to_s] element ? element.texts.map {|t| t.value}.join : nil end end end # method generator for element attribute values def UserDict.element_attr *names names.each do |name| define_method name do @node.attributes[name.to_s] end end end # method generator for relative URI attribute values def UserDict.reluri_attr *names names.each do |name| define_method name do value = @node.attributes[name.to_s] value = Planet.uri_norm(@node.xmlbase, value) if value value end end end # method generator for text constructs (plus detail) def UserDict.text_construct *names names.each do |name| define_method name do TextConstruct.new(@node.elements[name.to_s]).value end define_method name.to_s + "_detail" do TextConstruct.new(@node.elements[name.to_s]) end end end end class CommonElements < UserDict text_element :id alias :guid :id text_construct :rights alias :copyright :rights text_construct :title def link links.select {|link| link.rel=='alternate'}.first.href rescue nil end def links @node.elements.to_a('link').map {|node| Link.new(node)} end def license links.select {|link| link.rel=='license'}.first.href rescue nil end def tags @node.elements.to_a('category').map {|node| Category.new(node)} end def categories tags.map {|tag| [tag.scheme, tag.term]} end def category tags.first.term rescue nil end def contributors @node.elements.to_a('contributor').map {|node| Author.new(node)} end def categories tags.map {|tag| [tag.scheme, tag.term]} end def category tags.first.term rescue nil end def author author_detail.to_s end def author_detail Author.new(@node.elements['author']) end alias :publisher :author alias :publisher_detail :author_detail end class Feed < CommonElements text_element :icon, :logo text_construct :subtitle alias :description :subtitle alias :tagline :subtitle def generator generator_detail.name end def generator_detail Generator.new(@node.elements['generator']) end end class Entry < CommonElements text_construct :summary alias :description :summary def content @node.elements.to_a('content').map {|node| TextConstruct.new(node)} end def enclosures links.select {|link| link.rel == 'enclosure'} end def comments links.select { |link| link.rel == 'replies' and link.type == 'text/html' }.first.href rescue nil end def source Feed.new(@node.elements['source']) end end class TextConstruct < UserDict require 'html5' require 'html5/treewalkers' require 'html5/serializer' REXML_TREEWALKER = HTML5::TreeWalkers['rexml'] element_attr :src def value case @node.attributes['type'] when 'xhtml' serialize(@node.elements[1].to_a).strip when 'text', nil, /^text\//i (@node.text || '').strip when 'html' text = @node.text.strip rescue '' serialize HTML5.parse_fragment(text, :encoding => 'UTF-8') when /\+xml$/i, /\/xml$/i @node.to_a.to_s.strip else # base 64 @node.text.gsub(/\s/,'').unpack('m').first end end def type case @node.attributes['type'] when 'xhtml' 'application/xhtml+xml' when 'text', nil 'text/plain' when 'html' 'text/html' else @node.attributes['type'] end end def base url_norm(@node.xmlbase) end private # DOM to string def serialize nodes nodes.map { |node| # resolve relative URIs if node.respond_to? :attributes if !node.parent.parent node.parent.attributes['xml:base'] ||= @node.xmlbase end resolve node if node.respond_to? :attributes end HTML5::XHTMLSerializer.serialize(REXML_TREEWALKER.new(node)) }.join end # resolve relative URIs def resolve element element.attributes.each do |name,value| if %w(href).include? name element.attributes[name] = Planet.uri_norm(element.xmlbase, value) end end element.each_element { |child| resolve child } end end class Author < UserDict text_element :name, :email, :uri def uri value = @node.elements['uri'] if value value = Planet.uri_norm(value.xmlbase, value.text) end value end def to_s email ? "#{name} (#{email})" : "#{name}" end alias :url :uri alias :href :uri end class Link < UserDict element_attr :title, :length, :hreflang reluri_attr :href alias :url :href def rel @node.attributes['rel'] or 'alternate' end def type @node.attributes['type'] or (rel=='self' ? 'application/atom+xml' : nil) end end class Category < UserDict element_attr :term, :scheme, :label end class Generator < UserDict element_attr :version reluri_attr :uri alias :href :uri def name @node.text end end end