ARGV.push(*Dir.glob('/home/rubys/atom/20??.atom')) if ARGV.empty? def mysql opts, stream IO.popen("mysql #{opts}", 'w') { |io| io.puts stream } end mysql "-u root", <<-END drop database if exists weblog_development; create database weblog_development; grant all on weblog_development.* to #{`id -un`.strip}@localhost; END mysql "weblog_development", <<-END drop table if exists entries; create table entries ( id int not null auto_increment, atomid varchar(45), slug varchar(80), title varchar(100), updated datetime, summary text, content text, method char(10), author_id int, parent_id int, primary key(id) ); drop table if exists authors; create table authors ( id int not null auto_increment, name varchar(100), ipaddr varchar(100), email varchar(100), uri varchar(100), primary key(id) ); alter table authors add index(name); END require 'config/environment' require 'rexml/document' class REXML::Element def atomtext return nil if attribute("src") type = attribute("type") case type && type.value when "xhtml" throw "missing xhtml:div" if elements[1].name != "div" return elements[1].to_a.to_s.strip when "html" return text.strip when "text", nil return to_a.to_s.strip when /^text\//i, /\+xml$/i, /\/xml$/i return to_a.to_s.strip else require 'base64' return Base64.decode64(text.gsub(/\s/,'')) end end def atomauthor params = {} elements.each {|e| params[e.name] = e.text} # adjust for messy input data params['name'] ||= params['title'] if params['url'] =~ /mailto:(.*)/ params['uri'] = $1 else params['uri'] ||= params['url'] end return Author.find_or_create(params), params['method'] end end ns = {"atom" => "http://www.w3.org/2005/Atom"} for filename in ARGV.sort_by {|file| file =~ /(\d+)\./ ? $1.rjust(5) : file} do puts filename doc = REXML::Document.new open(filename) {|file| file.read } feed=doc.find {|element| element.name="entry"} parent = nil for entry in feed.elements do next unless entry.namespace == ns['atom'] and entry.name=='entry' post = parent ? parent.children.create : Entry.new for child in entry.elements do next unless child.namespace == ns['atom'] case child.name when 'id': post.atomid = child.text when 'title': post.title = child.atomtext when 'summary': post.summary = child.atomtext when 'content': post.content = child.atomtext when 'updated': post.updated = child.text when 'author': post.author,post.method = child.atomauthor end end if not post.title.empty? and not parent if filename !~ /(^|\/)\d+\.atom$/ and filename =~ /(\w+)\.atom$/ post.slug = $1.gsub /_/,"-" else post.slug = post.title.gsub(/'/,'').gsub(/\W/,' ').strip.gsub(/\s+/,'-') end end post.save! parent = post unless parent end end mysql "weblog_development", <<-END alter table entries add fulltext(title,summary,content); END