summaryrefslogtreecommitdiff
path: root/charts.py
blob: 379d1381458fe82402670855093d25e7e9401a5e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy.sql import text
from f1elo.db import Session
from itertools import izip
from dateutil import rrule, relativedelta
from datetime import date
import unicodecsv, collections, sys

session = Session()
engine = session.get_bind()
connection = engine.connect()

def fetch_raw():
    raw_queries = {
    'championship-races': '''
    SELECT CONCAT(race, ' (', date, ')') FROM races WHERE _type = 4
    ''',
    'all-time-top-peak': '''
    SELECT drivers.driver,
    rankings.ranking,
    rankings.rank_date
    FROM rankings
    INNER JOIN (
    SELECT MAX(ranking) ranking, _driver FROM rankings
    GROUP BY _driver
    ) r ON r.ranking=rankings.ranking AND r._driver=rankings._driver
    JOIN drivers ON rankings._driver = drivers.id
    ORDER BY rankings.ranking DESC
    LIMIT 0,20
    ''',
    'all-time-top-peak-progression': '''
    SELECT drivers.driver,
    max_date_rankings.max_rank_date,
    max_date_rankings.max_ranking
    FROM max_date_rankings
    INNER JOIN rankings ON (rankings.ranking = max_date_rankings.max_ranking)
    AND (rankings.rank_date = max_date_rankings.max_rank_date)
    LEFT JOIN drivers ON rankings._driver = drivers.id
    WHERE max_ranking > (
    SELECT MAX(mr.max_ranking) FROM max_date_rankings mr
    WHERE mr.max_rank_date < max_date_rankings.max_rank_date
    )
    ''',
    'top-exit-rankings': '''
    SELECT drivers.driver,
    rankings.ranking,
    rankings.rank_date
    FROM rankings
    INNER JOIN (
    SELECT MAX(rank_date) rank_date, _driver FROM rankings GROUP BY _driver
    ) r ON r.rank_date=rankings.rank_date AND r._driver=rankings._driver
    JOIN drivers ON rankings._driver = drivers.id
    WHERE rankings.rank_date < CURDATE() - INTERVAL 1 YEAR
    ORDER BY rankings.ranking DESC
    LIMIT 0, 20
    ''',
    'top-rankings-by-season': '''
    SELECT tpr.date, tpr.driver, tpr.ranking, tpr.rank_date, tpr.position,
    tar.driver, ROUND(tar.average,2), tar.position,
    c.driver
    FROM top_peak_rankings tpr
    JOIN top_average_rankings tar ON tpr.date = tar.date
    JOIN champions c ON c.year = tpr.date
    ''',
    'top-rookie-end-season-rankings': '''
    SELECT drivers.driver, rankings.rank_date, rankings.ranking
    FROM rankings
    JOIN (
    SELECT MAX(rankings.rank_date) rank_date, rankings._driver
    FROM rankings
    JOIN rookie_seasons ON rookie_seasons.year = YEAR(rankings.rank_date)
    AND rookie_seasons._driver = rankings._driver
    GROUP BY rankings._driver
    ) r ON r.rank_date = rankings.rank_date AND r._driver = rankings._driver
    JOIN drivers ON drivers.id = rankings._driver
    ORDER BY rankings.ranking
    DESC LIMIT 0, 20;
    ''',
    'top-rookie-average-rankings': '''
    SELECT drivers.driver, rookie_seasons.year, AVG(rankings.ranking) ranking
    FROM rankings
    JOIN rookie_seasons ON YEAR(rank_date) = rookie_seasons.year
    AND rookie_seasons._driver = rankings._driver
    JOIN drivers ON drivers.id = rankings._driver
    GROUP BY rankings._driver
    ORDER BY ranking DESC
    LIMIT 0,20;
    ''',
    'season-by-season-inflation': '''
    SELECT date,
    MAX(max_ranking),
    MIN(min_ranking),
    AVG(avg_ranking),
    AVG(avg_ranking)+STDDEV(avg_ranking),
    AVG(avg_ranking)-STDDEV(avg_ranking)
    FROM driver_yearly_rankings
    GROUP BY date
    ORDER BY date ASC;
    '''
    }

    for file, query in raw_queries.iteritems():
        csv = unicodecsv.writer(open('charts/' + file + '.csv', 'w'), lineterminator='\n')
        for result in connection.execute(text(query)):
            csv.writerow(result)

def fetch_decades():
    for decade in range(1950, 2025, 5):
        drivers = []
        for year in range(decade, decade + 5):
            for driver_id in connection.execute(text('''
            (SELECT _driver
            FROM driver_yearly_rankings
            WHERE date = :year
            ORDER BY avg_ranking DESC
            LIMIT 0,3)
            UNION DISTINCT
            (SELECT _driver
            FROM driver_yearly_rankings
            WHERE date = :year
            ORDER BY max_ranking DESC
            LIMIT 0,3)
            '''), year=year):
                drivers.append(driver_id[0])
        rankings = connection.execute(text("SELECT rankings.rank_date, MAX(rankings.ranking), rankings._driver, drivers.* FROM rankings JOIN drivers ON drivers.id = rankings._driver WHERE rank_date >= :date_from AND rank_date <= :date_to AND _driver IN :drivers GROUP BY rankings._driver, rankings.rank_date"), date_from=str(decade)+'-01-01', date_to=str(decade+4)+'-12-31', drivers=drivers)
        csv = unicodecsv.writer(open('charts/' + str(decade) + 's.csv', 'w'))
        tree = lambda: collections.defaultdict(tree)
        table = tree()
        dates = set()
        for row in rankings:
            dates.add(str(row[0]))
            table[row[4]][str(row[0])] = row[1]
        dates = list(dates)
        dates.append('')
        dates = sorted(dates)
        output = []
        output.append(dates)
        for driver, results in table.iteritems():
            row = []
            row.append(driver)
            for date in dates:
                if date:
                    if results.has_key(date):
                        row.append(results[date])
                    else:
                        row.append('')
            output.append(row)
        csv.writerows(output)

def fetch_rolling():
    output = []
    min_date = connection.execute(text('SELECT MIN(date) FROM races')).first()[0].replace(day=1)
    for begin_date in list(rrule.rrule(rrule.MONTHLY, dtstart=min_date, until=date.today())):
        end_date = begin_date + relativedelta.relativedelta(months=6)
        sql = 'SELECT AVG(avg), STDDEV(avg), AVG(dev) FROM (SELECT AVG(ranking) avg, STDDEV(ranking) dev FROM rankings WHERE rank_date BETWEEN :begin_date AND :end_date GROUP BY _driver) avg'
        result = connection.execute(text(sql), begin_date=begin_date, end_date=end_date).first()
        output.append([end_date.strftime('%Y-%m')] + result.values())
    unicodecsv.writer(open('charts/rolling_averages.csv', 'w')).writerows(output)

if len(sys.argv) < 2:
    sys.argv.append('all')

if sys.argv[1] == 'sql':
    fetch_raw()
elif sys.argv[1] == 'decades':
    fetch_decades()
elif sys.argv[1] == 'rolling':
    fetch_rolling()
elif sys.argv[1] == 'all':
    fetch_raw()
    fetch_decades()
    fetch_rolling()