From b13813f96822ed7de314b6dd417831de8a641134 Mon Sep 17 00:00:00 2001 From: emkael Date: Fri, 31 Oct 2014 21:45:51 +0100 Subject: * revised and reorganized dump scripts --- dumps/_sites/__init__.py | 0 dumps/_sites/chicane-f1.py | 14 ++++++++++++++ dumps/_sites/second-a-lap.py | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 dumps/_sites/__init__.py create mode 100644 dumps/_sites/chicane-f1.py create mode 100644 dumps/_sites/second-a-lap.py (limited to 'dumps/_sites') diff --git a/dumps/_sites/__init__.py b/dumps/_sites/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dumps/_sites/chicane-f1.py b/dumps/_sites/chicane-f1.py new file mode 100644 index 0000000..07d90c9 --- /dev/null +++ b/dumps/_sites/chicane-f1.py @@ -0,0 +1,14 @@ +import urllib +import urllib2 +import urlparse + +from lxml import html + +def fetch(url): + contents = urllib2.urlopen(url).read() + tree = html.fromstring(contents) + title = tree.xpath("//title")[0].text + tables = tree.xpath("//table[@cellpadding=6]") + print url + print title + return title, tables diff --git a/dumps/_sites/second-a-lap.py b/dumps/_sites/second-a-lap.py new file mode 100644 index 0000000..52591d6 --- /dev/null +++ b/dumps/_sites/second-a-lap.py @@ -0,0 +1,18 @@ +import json +import urllib +import urllib2 +import urlparse + +from lxml import html + +def fetch(url): + url = urlparse.urlparse(url).path + contents = json.loads(urllib2.urlopen('http://second-a-lap.blogspot.com/feeds/posts/default?' + + urllib.urlencode({'alt': 'json', 'v': 2, 'dynamicviews': 1, 'path': url})).read()) + title = contents['feed']['entry'][0]['title']['$t'] + text = contents['feed']['entry'][0]['content']['$t'] + tree = html.fromstring(text) + tables = tree.xpath("//table[@bordercolor]") + print url + print title + return title, tables -- cgit v1.2.3