summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2019-02-13 12:22:04 +0300
committeremkael <emkael@tlen.pl>2019-02-13 12:22:17 +0300
commitd4c80671bc76966b14d7ae3112d96ed8114cf7cc (patch)
tree5d0ccd6a2b3f5ff237a8e37c05cd2c01c41f82d4
parent2c7ba2c6a8879adf1b878951fed71f2e8cff62f5 (diff)
Fetching tournament/session list from event link
-rw-r--r--fetcher.py81
1 files changed, 81 insertions, 0 deletions
diff --git a/fetcher.py b/fetcher.py
new file mode 100644
index 0000000..3395941
--- /dev/null
+++ b/fetcher.py
@@ -0,0 +1,81 @@
+import urllib2
+from bs4 import BeautifulSoup as bs
+import sys, os, hashlib, re
+
+def fetch_url(url):
+ round_hash = hashlib.sha224(url).hexdigest()
+ cache_path = os.path.join('cache', round_hash)
+ if not os.path.exists(cache_path):
+ r_content = urllib2.urlopen(url).read()
+ file(cache_path, 'w').write(r_content)
+ else:
+ r_content = file(cache_path).read()
+ return r_content
+
+class Event(object):
+ link = None
+ name = None
+ tournaments = None
+ results = None
+ session_link_regex = re.compile(r'/TotalPairs\.asp\?qtournid=(\d+)&qroundno=(\d+)&qgroupno=(\d+)$', flags=re.I)
+
+ def __init__(self, link):
+ self.link = link
+ self.tournaments = {}
+ self.results = bs(fetch_url(self.link), 'lxml')
+ self.name = self.results.find('title').text
+ self.get_tournaments()
+
+ def get_tournaments(self):
+ for link in self.results.select('a[href]'):
+ session_link = self.session_link_regex.search(link['href'])
+ if session_link:
+ tournament_id = int(session_link.group(1))
+ session_number = int(session_link.group(2))
+ session_group = int(session_link.group(3))
+ if tournament_id not in self.tournaments:
+ self.tournaments[tournament_id] = Tournament()
+ self.tournaments[tournament_id].id = tournament_id
+ name = link.text.split()
+ if len(name) > 1:
+ self.tournaments[tournament_id].name = name[0]
+ session = Session(link.href, session_group, session_number, name[-1])
+ session.tournament = self.tournaments[tournament_id]
+ self.tournaments[tournament_id].sessions.append(session)
+
+ def __repr__(self):
+ return self.name
+
+class Tournament(object):
+ id = None
+ name = None
+ sessions = None
+
+ def __init__(self):
+ self.sessions = []
+
+ def __repr__(self):
+ return '%s (#%d)' % (self.name, self.id)
+
+class Session(object):
+ tournament = None
+ link = None
+ group_number = None
+ round_number = None
+ name = None
+
+ def __init__(self, link, group_no, round_no, name):
+ self.link = link.replace('/TotalPairs', '/RoundPairs')
+ self.group_number = group_no
+ self.round_number = round_no
+ self.name = name
+
+ def __repr__(self):
+ return '%s (#%d/%d/%d)' % (self.name, self.tournament.id, self.group_number, self.round_number)
+
+results_url = sys.argv[1]
+event = Event(results_url)
+
+print event
+for tournament in event.tournaments.values():
+ print tournament, tournament.sessions