diff options
Diffstat (limited to 'dumps/_sites')
-rw-r--r-- | dumps/_sites/__init__.py | 0 | ||||
-rw-r--r-- | dumps/_sites/chicane-f1.py | 14 | ||||
-rw-r--r-- | dumps/_sites/second-a-lap.py | 18 |
3 files changed, 32 insertions, 0 deletions
diff --git a/dumps/_sites/__init__.py b/dumps/_sites/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/dumps/_sites/__init__.py diff --git a/dumps/_sites/chicane-f1.py b/dumps/_sites/chicane-f1.py new file mode 100644 index 0000000..07d90c9 --- /dev/null +++ b/dumps/_sites/chicane-f1.py @@ -0,0 +1,14 @@ +import urllib +import urllib2 +import urlparse + +from lxml import html + +def fetch(url): + contents = urllib2.urlopen(url).read() + tree = html.fromstring(contents) + title = tree.xpath("//title")[0].text + tables = tree.xpath("//table[@cellpadding=6]") + print url + print title + return title, tables diff --git a/dumps/_sites/second-a-lap.py b/dumps/_sites/second-a-lap.py new file mode 100644 index 0000000..52591d6 --- /dev/null +++ b/dumps/_sites/second-a-lap.py @@ -0,0 +1,18 @@ +import json +import urllib +import urllib2 +import urlparse + +from lxml import html + +def fetch(url): + url = urlparse.urlparse(url).path + contents = json.loads(urllib2.urlopen('http://second-a-lap.blogspot.com/feeds/posts/default?' + + urllib.urlencode({'alt': 'json', 'v': 2, 'dynamicviews': 1, 'path': url})).read()) + title = contents['feed']['entry'][0]['title']['$t'] + text = contents['feed']['entry'][0]['content']['$t'] + tree = html.fromstring(text) + tables = tree.xpath("//table[@bordercolor]") + print url + print title + return title, tables |