summaryrefslogtreecommitdiff
path: root/dumps/second-a-lap.py
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2014-10-12 22:12:30 +0200
committeremkael <emkael@tlen.pl>2014-10-12 22:12:30 +0200
commite8adaa0eb15f1b83a430ec35046b4d2756e02885 (patch)
tree9582d5cc4410e74654b157c4a7f1c270285cebb0 /dumps/second-a-lap.py
parent3d39c79d532800a8bdb7e410a1d441dfbc31cb01 (diff)
* Second A Lap blog scraper
Diffstat (limited to 'dumps/second-a-lap.py')
-rwxr-xr-xdumps/second-a-lap.py28
1 files changed, 28 insertions, 0 deletions
diff --git a/dumps/second-a-lap.py b/dumps/second-a-lap.py
new file mode 100755
index 0000000..e8221b5
--- /dev/null
+++ b/dumps/second-a-lap.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+
+from sys import argv
+import urlparse, urllib, urllib2
+import json, pprint
+from lxml import html, etree
+import os, string
+
+def fetch(url):
+ print url
+ contents = json.loads(urllib2.urlopen('http://second-a-lap.blogspot.com/feeds/posts/default?'+urllib.urlencode({ 'alt': 'json', 'v': 2, 'dynamicviews': 1, 'path': url })).read())
+ title = contents['feed']['entry'][0]['title']['$t']
+ print title
+ text = contents['feed']['entry'][0]['content']['$t']
+ tree = html.fromstring(text)
+ tables = tree.xpath("//table[@bordercolor]")
+ i = 1
+ for table in tables:
+ name = "".join(x for x in title if x.isalnum()) + '-' + str(i) + '.txt'
+ print name
+ path = open(name, 'w')
+ print >>path, etree.tostring(table)
+ i += 1
+
+if __name__ == "__main__":
+ if len(argv) > 1:
+ url = urlparse.urlparse(argv[1])
+ fetch(url.path)