From 50eda8dcf4c764493efe3cae4cf81df916ea2e7d Mon Sep 17 00:00:00 2001 From: emkael Date: Fri, 7 Nov 2014 12:54:17 +0100 Subject: * non-championship races dump from chicanef1 --- dumps/chicane-f1-nc.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 dumps/chicane-f1-nc.py (limited to 'dumps/chicane-f1-nc.py') diff --git a/dumps/chicane-f1-nc.py b/dumps/chicane-f1-nc.py new file mode 100644 index 0000000..7e1ce57 --- /dev/null +++ b/dumps/chicane-f1-nc.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +import urllib +import urllib2 +import urlparse + +from lxml import html + +for year in range(1954,2015): + url = 'http://chicanef1.com/calendar.pl?' + urllib.urlencode({'year':year,'nc':1}) + contents = urllib2.urlopen(url).read() + tree = html.fromstring(contents) + links = tree.xpath('//table[@cellpadding=6]//tr/td[2]/center[text()="Non-championship"]/../..//td[1]//a') + for link in links: + url = urlparse.urlparse(link.attrib['href']) + url = url._replace(path='race.pl') + query = dict(urlparse.parse_qsl(url.query)) + for type in ['h1q', 'heat1', 'heat2', 'agg', 'final', 'qual', 'res']: + query['type'] = type + url = url._replace(query=urllib.urlencode(query)) + print urlparse.urljoin('http://chicanef1.com', urlparse.urlunparse(url)) -- cgit v1.2.3