%# begin user customizations % %# mod atom-root directory % ROOTDIR = '/home/rubys/tmp/atom/test-pub' % %# add html5lib to your search path %# svn checkout http://html5lib.googlecode.com/svn/trunk/ html5lib % $:.unshift '/home/rubys/svn/html5lib/ruby/lib' % %# end user customizations % % require "rexml/document.rb"; include REXML % % # atom content model % def text element % return nil if element.attribute("src") % type = element.attribute("type") % case type && type.value % when "xhtml" % throw "missing xhtml:div" if element.elements[1].name != "div" % element.elements[1].to_a.to_s.strip % when "html" % element.text.strip % when "text", nil % element.to_a.to_s.strip % when /^text\//i, /\+xml$/i, /\/xml$/i % element.to_a.to_s.strip % else % require 'base64' % Base64.decode64(element.text.gsub(/\s/,'')) % end % end % % # html5 sanitization % require 'html5/html5parser'; require 'html5/sanitizer'; include HTML5 % def clean element % HTMLParser.parseFragment(text(element), :tokenizer=>HTMLSanitizer, :encoding=>'utf-8') % end % % # atom namespace % ns = {'atom' => 'http://www.w3.org/2005/Atom'} %
% % Dir["#{ROOTDIR}/**/*"].sort.reverse.each_with_index do |file, index| % break if index>12 % next if %w(lock. collection).include? File.basename(file) % next if File.directory? file % doc=Document.new(open(file)) rescue next %