import json import os import re import time import urllib2 import urlparse from slugify import slugify from sqlalchemy import inspect from rcal.db import Session from rcal.model import Calendar, Category BASEPATH = os.path.join( os.environ['PYTHONPATH'], '..', '..') CONFIG = json.load(open( os.path.join( os.environ['PYTHONPATH'], 'config', 'reddit-import.json'), 'r')) def get_cal_list(): cache_path = os.path.join( BASEPATH, CONFIG['cache']) if not os.path.exists(cache_path) or \ int(time.time()) - int(os.path.getmtime(cache_path)) > \ CONFIG['cache_time']: opener = urllib2.build_opener() opener.addheaders = [('User-Agent', CONFIG['user_agent'])] cal_list = json.loads(opener.open(CONFIG['reddit_url']).read()) cal_list = cal_list['data']['content_md'] with open(cache_path, 'w') as cache_file: cache_file.write(cal_list) cache_file.close() else: cal_list = open(cache_path, 'r').read() return cal_list def update_calendar(cal, session): db_cal = Calendar.fetch(cal['uid'], session) if inspect(db_cal).pending: print 'Adding calendar %s (%s)' % (cal['name'], cal['uid']) db_cal.name = cal['name'] db_cal.custom_url = slugify(cal['name']) db_cal.url = cal['url'] db_cal.website = cal['website'] db_cal.category = Category.fetch(cal['category'], session) if db_cal.name != cal['name']: print 'Updating calendar name: %s -> %s (%s)' % ( db_cal.name, cal['name'], db_cal.uid) db_cal.name = cal['name'] if db_cal.url != cal['url']: print 'Updating calendar url: %s -> %s (%s)' % ( db_cal.url, cal['url'], db_cal.uid) db_cal.url = cal['url'] if db_cal.website != cal['website']: print 'Updating calendar website: %s -> %s (%s)' % ( db_cal.website, cal['website'], db_cal.uid) db_cal.website = cal['website'] if 'category_mapping' in CONFIG and \ cal['category'] in CONFIG['category_mapping']: cal['category'] = CONFIG['category_mapping'][cal['category']] # informational only if db_cal.category.name != cal['category']: print 'Calendar category changed: %s -> %s (%s, %s)' % ( db_cal.category.name, cal['category'], db_cal.name, db_cal.uid) def get_imported_calendars(cells, ical_markdown): imported_calendars = [] for row in cells: row = [r for r in row if r.strip()] if len(row) == 5: markdown_match = re.match(ical_markdown, row[2]) if markdown_match: ical_url = urlparse.urlparse(markdown_match.group(1)) if ical_url.netloc == 'calendar.google.com': ical_path = re.sub( '^/?calendar/ical/', '', ical_url.path).split('/') if len(ical_path) == 3: imported_calendars.append({ 'uid': ical_path[0], 'url': ical_url.geturl(), 'name': row[0], 'website': row[4].split()[0], 'category': row[1] }) else: print 'Unknown iCal URL format: %s' % ( ical_url.geturl()) else: print 'Unknown iCal URL format: %s' % ( ical_url.geturl()) return imported_calendars def main(): session = Session.create() cal_list = get_cal_list() ical_markdown = re.compile(r'^\[iCal\]\((.*)\)$') cells = [row.split('|') for row in cal_list.split('\n')] imported_calendars = get_imported_calendars(cells, ical_markdown) imported_calendar_uids = [c['uid'] for c in imported_calendars] db_only_calendars = session.query(Calendar).filter( ~Calendar.uid.in_(imported_calendar_uids)).all() if len(db_only_calendars): print 'Local calendars not in remote source:' for cal in db_only_calendars: print '%s (%s)' % (cal.name, cal.uid) print for cal in imported_calendars: update_calendar(cal, session) session.commit() if __name__ == '__main__': main()