summaryrefslogtreecommitdiff
path: root/dumps/second-a-lap.py
blob: e8221b50ecca1d4121d5481d1391b8f5428f8268 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env python

from sys import argv
import urlparse, urllib, urllib2
import json, pprint
from lxml import html, etree
import os, string

def fetch(url):
    print url
    contents = json.loads(urllib2.urlopen('http://second-a-lap.blogspot.com/feeds/posts/default?'+urllib.urlencode({ 'alt': 'json', 'v': 2, 'dynamicviews': 1, 'path': url })).read())
    title = contents['feed']['entry'][0]['title']['$t']
    print title
    text = contents['feed']['entry'][0]['content']['$t']
    tree = html.fromstring(text)
    tables = tree.xpath("//table[@bordercolor]")
    i = 1
    for table in tables:
        name = "".join(x for x in title if x.isalnum()) + '-' + str(i) + '.txt'
        print name
        path = open(name, 'w')
        print >>path, etree.tostring(table)
        i += 1

if __name__ == "__main__":
    if len(argv) > 1:
        url = urlparse.urlparse(argv[1])
        fetch(url.path)