blob: b308d44c4e41414c7f6cd19f553e8415dfc6cc46 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
import urllib
import urllib2
import urlparse
from lxml import html
def fetch(url):
contents = urllib2.urlopen(url).read()
tree = html.fromstring(contents)
title = tree.xpath("//h1")[0].text + ' - ' + tree.xpath('//span[@class="subtitle"]')[0].text
tables = tree.xpath("//table[@cellpadding=6]")
print url
print title
return title, tables
|