blob: 07d90c91ade558f1aa8a67ff58363059fca4e670 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
import urllib
import urllib2
import urlparse
from lxml import html
def fetch(url):
contents = urllib2.urlopen(url).read()
tree = html.fromstring(contents)
title = tree.xpath("//title")[0].text
tables = tree.xpath("//table[@cellpadding=6]")
print url
print title
return title, tables
|