1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy.sql import text
from f1elo.db import Session
from itertools import izip
from dateutil import rrule, relativedelta
from datetime import date
import unicodecsv, collections, sys
session = Session()
engine = session.get_bind()
connection = engine.connect()
def fetch_raw():
raw_queries = {
'championship-races': '''
SELECT CONCAT(race, ' (', date, ')') FROM races WHERE _type = 4
''',
'all-time-top-peak': '''
SELECT drivers.driver,
rankings.ranking,
rankings.rank_date
FROM rankings
INNER JOIN (
SELECT MAX(ranking) ranking, _driver FROM rankings
GROUP BY _driver
) r ON r.ranking=rankings.ranking AND r._driver=rankings._driver
JOIN drivers ON rankings._driver = drivers.id
GROUP BY rankings._driver
ORDER BY rankings.ranking DESC
LIMIT 0,20
''',
'all-time-top-peak-progression': '''
SELECT drivers.driver,
max_date_rankings.max_rank_date,
max_date_rankings.max_ranking
FROM max_date_rankings
INNER JOIN rankings ON (rankings.ranking = max_date_rankings.max_ranking)
AND (rankings.rank_date = max_date_rankings.max_rank_date)
LEFT JOIN drivers ON rankings._driver = drivers.id
WHERE max_ranking > (
SELECT MAX(mr.max_ranking) FROM max_date_rankings mr
WHERE mr.max_rank_date < max_date_rankings.max_rank_date
)
''',
'top-exit-rankings': '''
SELECT drivers.driver,
rankings.ranking,
rankings.rank_date
FROM rankings
INNER JOIN (
SELECT MAX(rank_date) rank_date, _driver FROM rankings GROUP BY _driver
) r ON r.rank_date=rankings.rank_date AND r._driver=rankings._driver
JOIN drivers ON rankings._driver = drivers.id
WHERE rankings.rank_date < CURDATE() - INTERVAL 1 YEAR
ORDER BY rankings.ranking DESC
LIMIT 0, 20
''',
'top-rankings-by-season': '''
SELECT tpr.date, tpr.driver, tpr.ranking, tpr.rank_date, tpr.position,
tar.driver, ROUND(tar.average,2), tar.position,
c.driver
FROM top_peak_rankings tpr
JOIN top_average_rankings tar ON tpr.date = tar.date
JOIN champions c ON c.year = tpr.date
''',
'top-rookie-end-season-rankings': '''
SELECT drivers.driver, rankings.rank_date, rankings.ranking
FROM rankings
JOIN (
SELECT MAX(rankings.rank_date) rank_date, rankings._driver
FROM rankings
JOIN rookie_seasons ON rookie_seasons.year = YEAR(rankings.rank_date)
AND rookie_seasons._driver = rankings._driver
GROUP BY rankings._driver
) r ON r.rank_date = rankings.rank_date AND r._driver = rankings._driver
JOIN drivers ON drivers.id = rankings._driver
ORDER BY rankings.ranking
DESC LIMIT 0, 20;
''',
'top-rookie-average-rankings': '''
SELECT drivers.driver, rookie_seasons.year, AVG(rankings.ranking) ranking
FROM rankings
JOIN rookie_seasons ON YEAR(rank_date) = rookie_seasons.year
AND rookie_seasons._driver = rankings._driver
JOIN drivers ON drivers.id = rankings._driver
GROUP BY rankings._driver
ORDER BY ranking DESC
LIMIT 0,20;
''',
'season-by-season-inflation': '''
SELECT date,
MAX(max_ranking),
MIN(min_ranking),
AVG(avg_ranking),
AVG(avg_ranking)+STDDEV(avg_ranking),
AVG(avg_ranking)-STDDEV(avg_ranking)
FROM driver_yearly_rankings
GROUP BY date
ORDER BY date ASC;
'''
}
for file, query in raw_queries.iteritems():
csv = unicodecsv.writer(open('charts/' + file + '.csv', 'w'), lineterminator='\n')
for result in connection.execute(text(query)):
csv.writerow(result)
def fetch_decades():
for decade in range(1950, 2020, 5):
drivers = []
for year in range(decade, decade + 5):
for driver_id in connection.execute(text('''
(SELECT _driver
FROM driver_yearly_rankings
WHERE date = :year
ORDER BY avg_ranking DESC
LIMIT 0,3)
UNION DISTINCT
(SELECT _driver
FROM driver_yearly_rankings
WHERE date = :year
ORDER BY max_ranking DESC
LIMIT 0,3)
'''), year=year):
drivers.append(driver_id[0])
rankings = connection.execute(text("SELECT rankings.id, rankings.rank_date, MAX(rankings.ranking), rankings._driver, drivers.* FROM rankings JOIN drivers ON drivers.id = rankings._driver WHERE rank_date >= :date_from AND rank_date <= :date_to AND _driver IN :drivers GROUP BY rankings._driver, rankings.rank_date"), date_from=str(decade)+'-01-01', date_to=str(decade+4)+'-12-31', drivers=drivers)
csv = unicodecsv.writer(open('charts/' + str(decade) + 's.csv', 'w'))
tree = lambda: collections.defaultdict(tree)
table = tree()
dates = set()
for row in rankings:
dates.add(str(row[1]))
table[row[5]][str(row[1])] = row[2]
dates = list(dates)
dates.append('')
dates = sorted(dates)
output = []
output.append(dates)
for driver, results in table.iteritems():
row = []
row.append(driver)
for date in dates:
if date:
if results.has_key(date):
row.append(results[date])
else:
row.append('')
output.append(row)
csv.writerows(output)
def fetch_rolling():
output = []
min_date = connection.execute(text('SELECT MIN(date) FROM races')).first()[0].replace(day=1)
for begin_date in list(rrule.rrule(rrule.MONTHLY, dtstart=min_date, until=date.today())):
end_date = begin_date + relativedelta.relativedelta(months=6)
sql = 'SELECT AVG(avg), STDDEV(avg), AVG(dev) FROM (SELECT AVG(ranking) avg, STDDEV(ranking) dev FROM rankings WHERE rank_date BETWEEN :begin_date AND :end_date GROUP BY _driver) avg'
result = connection.execute(text(sql), begin_date=begin_date, end_date=end_date).first()
output.append([end_date.strftime('%Y-%m')] + result.values())
unicodecsv.writer(open('charts/rolling_averages.csv', 'w')).writerows(output)
if len(sys.argv) < 2:
sys.argv.append('all')
if sys.argv[1] == 'sql':
fetch_raw()
elif sys.argv[1] == 'decades':
fetch_decades()
elif sys.argv[1] == 'rolling':
fetch_rolling()
elif sys.argv[1] == 'all':
fetch_raw()
fetch_decades()
fetch_rolling()
|