import json import os import re import time import urllib2 import urlparse from rcal.db import Session from rcal.model import Calendar, Category CONFIG = json.load(open( os.path.join( os.path.dirname(os.path.realpath(__file__)), '..', 'config', 'reddit-import.json'), 'r')) def main(): cal_list = None cache_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), CONFIG['cache']) if not os.path.exists(cache_path) or \ int(time.time()) - int(os.path.getmtime(cache_path)) > \ CONFIG['cache_time']: opener = urllib2.build_opener() opener.addheaders = [('User-Agent', CONFIG['user_agent'])] cal_list = json.loads(opener.open(CONFIG['reddit_url']).read()) cal_list = cal_list[0]['data']['children'][0]['data']['selftext'] with open(cache_path, 'w') as cache_file: cache_file.write(cal_list) cache_file.close() else: cal_list = open(cache_path, 'r').read() session = Session.create() ical_markdown = re.compile(r'^\[iCal\]\((.*)\)$') cells = [row.split('|') for row in cal_list.split('\n')] for row in cells: if len(row) == 7: row = [r for r in row if r] if len(row) == 5: markdown_match = re.match(ical_markdown, row[2]) if markdown_match: ical_url = urlparse.urlparse(markdown_match.group(1)) if ical_url.netloc == 'calendar.google.com': ical_path = re.sub( '^/?calendar/ical/', '', ical_url.path).split('/') if len(ical_path) == 3: calendar_uid = ical_path[0] calendar = Calendar.fetch( calendar_uid, session, row[0], ical_url.geturl()) calendar.website = row[4] calendar.category = Category.fetch(row[1], session) else: print ical_url.geturl() else: print ical_url.geturl() session.commit() if __name__ == '__main__': main()