import urllib2
from bs4 import BeautifulSoup as bs
import sys, os, hashlib, re
from urlparse import urljoin
from math import floor

results_url = sys.argv[1]

def fetch_url(url):
    round_hash = hashlib.sha224(url).hexdigest()
    cache_path = os.path.join('cache', round_hash)
    if not os.path.exists(cache_path):
        r_content = urllib2.urlopen(url).read()
        file(cache_path, 'w').write(r_content)
    else:
        r_content = file(cache_path).read()
    return r_content

results = bs(fetch_url(results_url), 'lxml')

round_links = []
for link in results.select('a[href]'):
    if '/RoundTeams.asp' in link['href']:
        round_links.append(link['href'])

class Round:
    tournament = None
    url = ''
    name = ''
    content = ''
    tables = None
    boards = None

    def __init__(self):
        self.tables = []
        self.boards = {}

    def number(self):
        return int(re.sub('\b*round\b*', '', self.name, flags=re.I))

    def __repr__(self):
        return self.tournament.name + ': ' + self.name

    def __eq__(self, other):
        return self.tournament == other.tournament and self.name == other.name

    def __gt__(self, other):
        return self.number() > other.number()

round_regex = re.compile('^round ', flags=re.I)
rounds = []
for r in set(round_links):
    url = urljoin(results_url, r)
    content = bs(fetch_url(url), 'lxml')
    first_row = content.select('table tr')[0]
    cells = [cell.text.strip() for cell in first_row.select('td')]
    round_cells = [cell for cell in cells if round_regex.match(cell)]
    other_cells = [cell for cell in cells if not round_regex.match(cell)]
    new_round = Round()
    new_round.name = ' '.join(round_cells)
    new_round.tournament = ' - '.join(other_cells)
    new_round.content = content
    new_round.url = url
    if new_round not in rounds:
        rounds.append(new_round)

tournament_data = {}
for r in rounds:
    if r.tournament not in tournament_data:
        tournament_data[r.tournament] = []
    tournament_data[r.tournament].append(r)

class Tournament:
    name = ''
    rounds = None
    lineup = None

    def __init__(self, name, rounds):
        self.lineup = []
        self.name = name
        self.rounds = rounds
        for round in rounds:
            round.tournament = self

    def __repr__(self):
        return '%s (%d rounds)' % (self.name, len(self.rounds))

tournaments = []
for tour in tournament_data:
    tournaments.append(Tournament(tour, tournament_data[tour]))

class Table:
    results = None
    content = ''

    def __init__(self):
        self.results = []

class Pair:
    first_name = ''
    second_name = ''
    nation = ''
    results = None

    def __init__(self, name1, name2, nation):
        self.results = []
        self.first_name = name1
        self.second_name = name2
        self.nation = nation

    def __eq__(self, other):
        return ' - '.join(sorted([self.first_name, self.second_name])) == \
            ' - '.join(sorted([other.first_name, other.second_name]))

    def __repr__(self):
        return '%s - %s (%s)' % (self.first_name, self.second_name, self.nation)

    def __hash__(self):
        return int(hashlib.sha224(self.__repr__()).hexdigest(), 16)

class Result:
    ns_pair = None
    ew_pair = None
    tour_round = None
    board_no = 0
    score = 0
    butler = 0
    cutoff_butler = 0
    cavendish = 0

    def __init__(self, ns, ew, score, rnd, board):
        self.ns_pair = ns
        self.ew_pair = ew
        self.score = score
        self.tour_round = rnd
        self.board_no = board
        self.ns_pair.results.append(self)
        self.ew_pair.results.append(self)

    def __gt__(self, other):
        return self.score > other.score

    def __repr__(self):
        return '%d-%d\t%d\t%d\t%d\t%f' % (self.tour_round.number(), self.board_no,
                                          self.score,
                                          self.butler, self.cutoff_butler,
                                          self.cavendish)

for tour in tournaments:
    for r in tour.rounds:
        table_urls = [urljoin(r.url, link['href']) for link in r.content.select('a[href]') if 'BoardDetails.asp' in link['href']]
        for url in table_urls:
            table = Table()
            table.content = bs(fetch_url(url), 'lxml')
            team_links = [link for link in table.content.select('div[align] a[href]') if 'TeamDetails.asp' in link['href']]
            if len(team_links) == 2:
                home_team = team_links[0].text
                away_team = team_links[1].text
                players = [link.text.strip() for link in table.content.select('a[href]') if 'people/person.asp' in link['href']]
                if len(players) == 8:
                    pairs = [
                        Pair(players[0], players[6], home_team), # open
                        Pair(players[4], players[5], home_team), # closed
                        Pair(players[2], players[3], away_team), # open
                        Pair(players[1], players[7], away_team)  # closed
                    ]
                    for i, pair in enumerate(pairs):
                        try:
                            pairs[i] = tour.lineup[tour.lineup.index(pair)]
                        except ValueError:
                            tour.lineup.append(pair)
                    result_cells = [int(cell.text.strip()) if len(cell.text.strip()) > 0 else 0 for cell in table.content.select('tr[nowrap] b')]
                    open_scores = []
                    closed_scores = []
                    for i in range(0, len(result_cells) / 6):
                        open_scores.append(result_cells[6*i] - result_cells[6*i + 1])
                        closed_scores.append(result_cells[6*i + 2] - result_cells[6*i + 3])
                    for board, score in enumerate(open_scores):
                        new_score = Result(pairs[0], pairs[2], score, r, board+1)
                        table.results.append(new_score)
                    for board, score in enumerate(closed_scores):
                        new_score = Result(pairs[3], pairs[1], score, r, board+1)
                        table.results.append(new_score)
            r.tables.append(table)
    for r in tour.rounds:
        for table in r.tables:
            for result in table.results:
                if result.board_no not in r.boards:
                    r.boards[result.board_no] = []
                r.boards[result.board_no].append(result)

def imp(res1, res2):
    diff = res1 - res2
    ew = False
    if diff < 0:
        ew = True
        diff = -diff
    thresholds = [20, 50, 90, 130, 170, 220, 270, 320, 370, 430,
                  500, 600, 750, 900, 1100, 1300, 1500, 1750,
                  2000, 2250, 2500, 3000, 3500, 4000]
    imps = len([t for t in thresholds if diff >= t])
    return -imps if ew else imps

def get_datum(board):
    average = float(sum([r.score for r in board])) / len(board)
    return int(round(average / 10)) * 10

for tour in tournaments:

    if len(tour.lineup) == 0:
        continue

    for rnd in tour.rounds:
        for i, board in rnd.boards.iteritems():
            datum = get_datum(board)
            cutoff = int(floor(len(board) / 4))
            cutoff_results = sorted(board)
            cutoff_datum = get_datum(cutoff_results[cutoff:-cutoff])
            for r in board:
                r.butler = imp(r.score, datum)
                r.cutoff_butler = imp(r.score, cutoff_datum)
                r.cavendish = float(sum([imp(r.score, other.score) for other in board if r <> other])) / float((len(board) - 1))

    print tour.name

    for pair in tour.lineup:
        print pair
        result_table = []
        for res in pair.results:
            ew = -1 if res.ew_pair == pair else 1
            result_table.append([
                res.tour_round.number(),
                res.board_no,
                res.score,
                ew * res.butler,
                ew * res.cutoff_butler,
                ew * res.cavendish
            ])
        for r in sorted(result_table, cmp=lambda x,y: cmp(x[0], y[0]) or cmp(x[1], y[1])):
            print '\t'.join([str(s) for s in r[:3]] + [str(round(s, 2)) for s in r[3:]])
        print '\t'.join([
            str(len(result_table)),
            '',
            '',
            str(round(float(sum([r[3] for r in result_table])) / float(len(result_table)), 2)),
            str(round(float(sum([r[4] for r in result_table])) / float(len(result_table)), 2)),
            str(round(float(sum([r[5] for r in result_table])) / float(len(result_table)), 2))
        ])
        print
    print

    head_to_head = {}
    for r in tour.rounds:
        for table in r.tables:
            for result in table.results:
                if result.ns_pair not in head_to_head:
                    head_to_head[result.ns_pair] = {}
                if result.ew_pair not in head_to_head[result.ns_pair]:
                    head_to_head[result.ns_pair][result.ew_pair] = []
                if result.ew_pair not in head_to_head:
                    head_to_head[result.ew_pair] = {}
                if result.ns_pair not in head_to_head[result.ew_pair]:
                    head_to_head[result.ew_pair][result.ns_pair] = []
                head_to_head[result.ns_pair][result.ew_pair].append([
                    result.butler,
                    result.cutoff_butler,
                    result.cavendish
                ])
                head_to_head[result.ew_pair][result.ns_pair].append([
                    -result.butler,
                    -result.cutoff_butler,
                    -result.cavendish
                ])
    for ns in head_to_head:
        for ew in head_to_head[ns]:
            count = float(len(head_to_head[ns][ew]))
            head_to_head[ns][ew] = {
                'butler': float(sum([r[0] for r in head_to_head[ns][ew]])) / count,
                'cutoff_butler': float(sum([r[1] for r in head_to_head[ns][ew]])) / count,
                'cavendish': float(sum([r[2] for r in head_to_head[ns][ew]])) / count,
                'count': count
            }
    normalized = {}
    for ns in head_to_head:
        print ns
        if ns not in normalized:
            normalized[ns] = {
                'butler': 0,
                'cutoff_butler': 0,
                'cavendish': 0,
                'count': 0
            }
        for ew in head_to_head[ns]:
            print 'Against %s' % ew
            head_to_head[ns][ew]['opposition'] = {
                'butler': 0,
                'cutoff_butler': 0,
                'cavendish': 0,
                'count': 0
            }
            for opposition in head_to_head[ew]:
                if opposition != ns:
                    head_to_head[ns][ew]['opposition']['butler'] += head_to_head[ew][opposition]['butler'] * head_to_head[ew][opposition]['count']
                    head_to_head[ns][ew]['opposition']['cutoff_butler'] += head_to_head[ew][opposition]['cutoff_butler'] * head_to_head[ew][opposition]['count']
                    head_to_head[ns][ew]['opposition']['cavendish'] += head_to_head[ew][opposition]['cavendish'] * head_to_head[ew][opposition]['count']
                    head_to_head[ns][ew]['opposition']['count'] += head_to_head[ew][opposition]['count']
            if head_to_head[ns][ew]['opposition']['count'] > 0:
                head_to_head[ns][ew]['opposition']['butler'] /= head_to_head[ns][ew]['opposition']['count']
                head_to_head[ns][ew]['opposition']['cutoff_butler'] /= head_to_head[ns][ew]['opposition']['count']
                head_to_head[ns][ew]['opposition']['cavendish'] /= head_to_head[ns][ew]['opposition']['count']
            print '%.2f\t%.2f\t%.2f' % (
                head_to_head[ns][ew]['butler'],
                head_to_head[ns][ew]['cutoff_butler'],
                head_to_head[ns][ew]['cavendish']
            )
            print 'Opposition average (%d boards)' % head_to_head[ns][ew]['opposition']['count']
            print '%.2f\t%.2f\t%.2f' % (
                head_to_head[ns][ew]['opposition']['butler'],
                head_to_head[ns][ew]['opposition']['cutoff_butler'],
                head_to_head[ns][ew]['opposition']['cavendish']
            )
            print 'Normalized'
            normalized_butler = head_to_head[ns][ew]['butler'] + head_to_head[ns][ew]['opposition']['butler']
            normalized_cutoff = head_to_head[ns][ew]['cutoff_butler'] + head_to_head[ns][ew]['opposition']['cutoff_butler']
            normalized_cavendish = head_to_head[ns][ew]['cavendish'] + head_to_head[ns][ew]['opposition']['cavendish']
            print '%.2f\t%.2f\t%.2f' % (
                normalized_butler,
                normalized_cutoff,
                normalized_cavendish
            )
            normalized[ns]['butler'] += normalized_butler * head_to_head[ns][ew]['count']
            normalized[ns]['cutoff_butler'] += normalized_cutoff * head_to_head[ns][ew]['count']
            normalized[ns]['cavendish'] += normalized_cavendish * head_to_head[ns][ew]['count']
            normalized[ns]['count'] += head_to_head[ns][ew]['count']
        print
    print 'Normalized totals'
    for pair in normalized:
        print pair
        print '%.2f\t%.2f\t%.2f\t%d' % (
            normalized[pair]['butler'] / normalized[pair]['count'],
            normalized[pair]['cutoff_butler'] / normalized[pair]['count'],
            normalized[pair]['cavendish'] / normalized[pair]['count'],
            normalized[pair]['count']
        )
    print