import glob, libxml2, os, time, sys, sgmllib, urllib from xml.sax.saxutils import escape now = time.time() week = 7 * 86400 week_ago = now - week cache = os.path.join(sys.argv[1], '*') all_links = {} for name in glob.glob(cache): # ensure that this is within the past week if os.path.isdir(name): continue mtime = os.stat(name).st_mtime if mtime < week_ago: continue # parse the file doc = libxml2.parseFile(name) xp = doc.xpathNewContext() xp.xpathRegisterNs("atom", "http://www.w3.org/2005/Atom") # determine the entry entry = xp.xpathEval("//atom:link[@rel='alternate']") if not entry: continue entry = entry[0].prop("href") # determine the feed feed = xp.xpathEval("//atom:source/atom:id") if feed: feed = feed[0].content else: feed = xp.xpathEval("//atom:source/atom:link[@href and @rel='self']") if not feed: feed = xp.xpathEval("//atom:source/atom:link[@href]") if not feed: continue feed = feed[0].prop('href') # identify the unique links entry_links = [] for node in doc.xpathEval("//*[@href and not(@rel='source')]"): if node.parent.name == 'source': continue if not node.prop('href') in entry_links: entry_links.append(str(node.prop('href'))) # add the votes vote = [(1.0 - (now - mtime)**2 / week**2, str(entry), str(feed))] for link in entry_links: all_links[link] = all_links.get(link,list()) + vote # free the entry doc.freeDoc() # tally the votes weighted_links = [] for link, votes in all_links.items(): site = {} for weight, entry, feed in votes: site[feed] = min(site.get(feed,1), weight) weighted_links.append((sum(site.values()), link)) weighted_links.sort() weighted_links.reverse() # determine the title for a given url class html(sgmllib.SGMLParser): def __init__(self, url): sgmllib.SGMLParser.__init__(self) self.title = "" self.intitle = False try: self.feed(urllib.urlopen(url).read()) except: pass if not self.title: self.title = url.split('/')[-1] def start_title(self, attributes): if not self.title: self.intitle = True def end_title(self): self.intitle = False def handle_data(self, text): if self.intitle: self.title += text # output the results print "