diff options
author | emkael <emkael@tlen.pl> | 2014-10-31 21:45:51 +0100 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2014-10-31 21:45:51 +0100 |
commit | b13813f96822ed7de314b6dd417831de8a641134 (patch) | |
tree | 0dc65154fd9531a31732e422619bb5b7aaebe775 /dumps/_sites | |
parent | ae189fadac1f56b2c10f2a5d11bad99980a8fd46 (diff) |
* revised and reorganized dump scripts
Diffstat (limited to 'dumps/_sites')
-rw-r--r-- | dumps/_sites/__init__.py | 0 | ||||
-rw-r--r-- | dumps/_sites/chicane-f1.py | 14 | ||||
-rw-r--r-- | dumps/_sites/second-a-lap.py | 18 |
3 files changed, 32 insertions, 0 deletions
diff --git a/dumps/_sites/__init__.py b/dumps/_sites/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/dumps/_sites/__init__.py diff --git a/dumps/_sites/chicane-f1.py b/dumps/_sites/chicane-f1.py new file mode 100644 index 0000000..07d90c9 --- /dev/null +++ b/dumps/_sites/chicane-f1.py @@ -0,0 +1,14 @@ +import urllib +import urllib2 +import urlparse + +from lxml import html + +def fetch(url): + contents = urllib2.urlopen(url).read() + tree = html.fromstring(contents) + title = tree.xpath("//title")[0].text + tables = tree.xpath("//table[@cellpadding=6]") + print url + print title + return title, tables diff --git a/dumps/_sites/second-a-lap.py b/dumps/_sites/second-a-lap.py new file mode 100644 index 0000000..52591d6 --- /dev/null +++ b/dumps/_sites/second-a-lap.py @@ -0,0 +1,18 @@ +import json +import urllib +import urllib2 +import urlparse + +from lxml import html + +def fetch(url): + url = urlparse.urlparse(url).path + contents = json.loads(urllib2.urlopen('http://second-a-lap.blogspot.com/feeds/posts/default?' + + urllib.urlencode({'alt': 'json', 'v': 2, 'dynamicviews': 1, 'path': url})).read()) + title = contents['feed']['entry'][0]['title']['$t'] + text = contents['feed']['entry'][0]['content']['$t'] + tree = html.fromstring(text) + tables = tree.xpath("//table[@bordercolor]") + print url + print title + return title, tables |