#!/usr/bin/env python import cgi, urllib, os, sys, BeautifulSoup from xml.sax.saxutils import escape form = cgi.FieldStorage() if os.environ.get('REQUEST_METHOD','').lower() == 'get': if form.has_key('url'): print "Content-type: application/xml; charset=utf-8\r\n\r\n", sys.argv = ['', form.getvalue('url')] else: print 'Content-type: text/html\r\n\r\n', print 'Scrape SYO' print '
URL
' sys.exit(0) print ''' ''' outline = ' ' for url in sys.argv[1:]: soup = BeautifulSoup.BeautifulStoneSoup(urllib.urlopen(url).read()) for row in soup.findAll('tr'): cols=row.findAll('td') xmlUrl=cols[2].find('a')['href'] url = cols[0].find('a') if url: text=url.string url='url="' + url['href'] + '" ' else: text=cols[0].string url='' print outline % (escape(url), escape(text), escape(xmlUrl)) print ''' '''