summaryrefslogtreecommitdiff
path: root/dumps/chicane-f1.py
blob: 2d9b4cd7457ea1eb0a5ea851db41650b2ad057bd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/usr/bin/env python
import urllib
import urllib2
import urlparse

from lxml import html

for year in range(1954,2015):
    url = 'http://chicanef1.com/calendar.pl?' + urllib.urlencode({'year':year,'nc':0})
    contents = urllib2.urlopen(url).read()
    tree = html.fromstring(contents)
    links = tree.xpath('//table[@cellpadding=6]//tr/td[1]//a')
    for link in links:
        url = urlparse.urlparse(link.attrib['href'])
        url = url._replace(path='race.pl')
        query = dict(urlparse.parse_qsl(url.query))
        for type in ['qual', 'preq']:
            query['type'] = type
            url = url._replace(query=urllib.urlencode(query))
            print urlparse.urljoin('http://chicanef1.com', urlparse.urlunparse(url))