diff options
author | emkael <emkael@tlen.pl> | 2016-02-19 13:30:20 +0100 |
---|---|---|
committer | emkael <emkael@tlen.pl> | 2016-02-19 13:55:06 +0100 |
commit | 7a384588d8cf273ec078d3370eebc4087eb92d75 (patch) | |
tree | 5da331075a837d28753c9cd6517ca45fad9035d0 | |
parent | 53078b3f387216b21bf8ab27636511f4373dc9f6 (diff) |
* calendar import script (from reddit markdown)
-rw-r--r-- | conf/reddit-import.json | 6 | ||||
-rw-r--r-- | import_cals.py | 67 | ||||
-rw-r--r-- | init_db.py | 6 |
3 files changed, 76 insertions, 3 deletions
diff --git a/conf/reddit-import.json b/conf/reddit-import.json new file mode 100644 index 0000000..4101f80 --- /dev/null +++ b/conf/reddit-import.json @@ -0,0 +1,6 @@ +{ + "cache": "cache/list.md", + "cache_time": 86400, + "reddit_url": "https://www.reddit.com/r/MotorsportsCalendar/comments/45uv93/2016_calendar_list.json", + "user_agent": "/u/emkael" +} diff --git a/import_cals.py b/import_cals.py new file mode 100644 index 0000000..09c3272 --- /dev/null +++ b/import_cals.py @@ -0,0 +1,67 @@ +import json +import os +import re +import time +import urllib2 +import urlparse + +from rcal.db import Session +from rcal.model import Calendar, Category + +CONFIG = json.load(open( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), + 'conf', + 'reddit-import.json'), + 'r')) + + +def main(): + cal_list = None + + if not os.path.exists(CONFIG['cache']) or \ + int(time.time()) - int(os.path.getmtime(CONFIG['cache'])) > \ + CONFIG['cache_time']: + opener = urllib2.build_opener() + opener.addheaders = [('User-Agent', CONFIG['user_agent'])] + cal_list = json.loads(opener.open(CONFIG['reddit_url']).read()) + cal_list = cal_list[0]['data']['children'][0]['data']['selftext'] + with open(CONFIG['cache'], 'w') as cache_file: + cache_file.write(cal_list) + cache_file.close() + else: + cal_list = open(CONFIG['cache'], 'r').read() + + session = Session.create() + + ical_markdown = re.compile(r'^\[iCal\]\((.*)\)$') + + cells = [row.split('|') for row in cal_list.split('\n')] + for row in cells: + if len(row) == 7: + row = [r for r in row if r] + if len(row) == 5: + markdown_match = re.match(ical_markdown, row[2]) + if markdown_match: + ical_url = urlparse.urlparse(markdown_match.group(1)) + if ical_url.netloc == 'calendar.google.com': + ical_path = re.sub( + '^/?calendar/ical/', '', ical_url.path).split('/') + if len(ical_path) == 3: + calendar_uid = ical_path[0] + calendar = Calendar.fetch( + calendar_uid, + session, + row[0], + ical_url.geturl()) + calendar.website = row[4] + calendar.category = Category.fetch(row[1], session) + else: + print ical_url.geturl() + else: + print ical_url.geturl() + + session.commit() + +if __name__ == '__main__': + main() @@ -1,16 +1,16 @@ import sys from rcal.db import Session -from rcal.model import Base +from rcal.model import BASE def main(): session = Session.create() if len(sys.argv) > 1 and sys.argv[1] == 'force': - Base.metadata.drop_all(session.get_bind()) + BASE.metadata.drop_all(session.get_bind()) - Base.metadata.create_all(session.get_bind()) + BASE.metadata.create_all(session.get_bind()) if __name__ == '__main__': main() |