diff options
Diffstat (limited to 'dumps/second-a-lap.py')
-rwxr-xr-x | dumps/second-a-lap.py | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/dumps/second-a-lap.py b/dumps/second-a-lap.py index 867d7d3..a6665c9 100755 --- a/dumps/second-a-lap.py +++ b/dumps/second-a-lap.py @@ -11,14 +11,14 @@ import urllib2 import urlparse from sys import argv -from lxml import etree, html - from bs4 import BeautifulSoup +from lxml import etree, html def fetch(url): print url - contents = json.loads(urllib2.urlopen('http://second-a-lap.blogspot.com/feeds/posts/default?'+urllib.urlencode({ 'alt': 'json', 'v': 2, 'dynamicviews': 1, 'path': url })).read()) + contents = json.loads(urllib2.urlopen('http://second-a-lap.blogspot.com/feeds/posts/default?' + + urllib.urlencode({'alt': 'json', 'v': 2, 'dynamicviews': 1, 'path': url})).read()) title = contents['feed']['entry'][0]['title']['$t'] print title text = contents['feed']['entry'][0]['content']['$t'] @@ -31,13 +31,19 @@ def fetch(url): path = open(os.path.join('second-a-lap', name + '.txt'), 'w') table = etree.tostring(table) print >>path, table - csv_file = csv.writer(open(os.path.join('second-a-lap', 'csv', name + '.csv'), 'w')) + csv_file = csv.writer( + open(os.path.join('second-a-lap', 'csv', name + '.csv'), 'w')) soup = BeautifulSoup(table) for row in soup.find_all('tr'): - row = map(lambda t: re.sub('\s+', ' ', " ".join(t.stripped_strings)).encode('utf-8'), row.find_all(re.compile('t[dh]'))) + row = map( + lambda t: re.sub('\s+', + ' ', + " ".join(t.stripped_strings)).encode('utf-8'), + row.find_all(re.compile('t[dh]'))) csv_file.writerow(row) i += 1 + def compile(files): headers = set() values = [] @@ -57,7 +63,7 @@ def compile(files): data['Race'] = race_id i += 1 values.append(data) - writer.writerow([race_id,path,'','','']) + writer.writerow([race_id, path, '', '', '']) race_id += 1 except IOError: pass |