summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2019-02-13 13:02:22 +0300
committeremkael <emkael@tlen.pl>2019-02-13 13:02:22 +0300
commitdf98a7b3539c6352eb2abc2e6ac46fe1011c1445 (patch)
tree30a201b6769c039f9db538216abd28974663e45d
parentd4c80671bc76966b14d7ae3112d96ed8114cf7cc (diff)
Fetching participants list
-rw-r--r--fetcher.py56
1 files changed, 49 insertions, 7 deletions
diff --git a/fetcher.py b/fetcher.py
index 3395941..291a71b 100644
--- a/fetcher.py
+++ b/fetcher.py
@@ -1,13 +1,16 @@
import urllib2
from bs4 import BeautifulSoup as bs
+from urlparse import urljoin
import sys, os, hashlib, re
def fetch_url(url):
round_hash = hashlib.sha224(url).hexdigest()
cache_path = os.path.join('cache', round_hash)
if not os.path.exists(cache_path):
+ print 'Fetching: %s' % (url)
r_content = urllib2.urlopen(url).read()
file(cache_path, 'w').write(r_content)
+ print 'Done.'
else:
r_content = file(cache_path).read()
return r_content
@@ -34,13 +37,12 @@ class Event(object):
session_number = int(session_link.group(2))
session_group = int(session_link.group(3))
if tournament_id not in self.tournaments:
- self.tournaments[tournament_id] = Tournament()
+ self.tournaments[tournament_id] = Tournament(self)
self.tournaments[tournament_id].id = tournament_id
name = link.text.split()
if len(name) > 1:
self.tournaments[tournament_id].name = name[0]
- session = Session(link.href, session_group, session_number, name[-1])
- session.tournament = self.tournaments[tournament_id]
+ session = Session(self.tournaments[tournament_id], link['href'], session_group, session_number, name[-1])
self.tournaments[tournament_id].sessions.append(session)
def __repr__(self):
@@ -50,9 +52,13 @@ class Tournament(object):
id = None
name = None
sessions = None
+ pairs = None
+ event = None
- def __init__(self):
+ def __init__(self, event):
self.sessions = []
+ self.pairs = {}
+ self.event = event
def __repr__(self):
return '%s (#%d)' % (self.name, self.id)
@@ -63,19 +69,55 @@ class Session(object):
group_number = None
round_number = None
name = None
+ results = None
- def __init__(self, link, group_no, round_no, name):
- self.link = link.replace('/TotalPairs', '/RoundPairs')
+ def __init__(self, tournament, link, group_no, round_no, name):
+ self.tournament = tournament
+ self.link = urljoin(self.tournament.event.link, link.replace('/TotalPairs', '/RoundPairs'))
self.group_number = group_no
self.round_number = round_no
self.name = name
+ self.results = bs(fetch_url(self.link), 'lxml')
+ self.pair_link_regex = re.compile(
+ r'boarddetailspairs\.asp\?qtournid=%d&qgroupno=%d&qroundno=%d&qpairid=(\d+)$' % (
+ self.tournament.id, self.group_number, self.round_number
+ ),
+ flags=re.I)
+ self.get_pairs()
+
+ def get_pairs(self):
+ for row in self.results.select('tr tr'):
+ for link in row.select('a[href]'):
+ pair_link = self.pair_link_regex.search(link['href'])
+ if pair_link:
+ pair_number = int(pair_link.group(1))
+ if pair_number not in self.tournament.pairs:
+ names = [a.text for a in row.select('a[href]') if 'person.asp' in a['href']]
+ nationalities = row.select('td')[-2].text
+ pair = Pair(pair_number, names, nationalities, self.tournament)
+ self.tournament.pairs[pair_number] = pair
def __repr__(self):
return '%s (#%d/%d/%d)' % (self.name, self.tournament.id, self.group_number, self.round_number)
+class Pair(object):
+ number = None
+ names = None
+ tournament = None
+ nationalities = None
+
+ def __init__(self, number, names, nationalities, tournament):
+ self.number = number
+ self.names = names
+ self.tournament = tournament
+ self.nationalities = nationalities
+
+ def __repr__(self):
+ return '#%d %s (%s)' % (self.number, ' - '.join(self.names), self.nationalities)
+
results_url = sys.argv[1]
event = Event(results_url)
print event
for tournament in event.tournaments.values():
- print tournament, tournament.sessions
+ print tournament, tournament.sessions, tournament.pairs