From 0c6e3ee216b30acfca4e3b0fab0b085b8e34960b Mon Sep 17 00:00:00 2001 From: emkael Date: Tue, 3 May 2016 21:35:29 +0200 Subject: * Python scripts moved --- app/python/fetch_cals.py | 93 +++++++++++++++++++++++++++++++++ app/python/import_cals.py | 128 ++++++++++++++++++++++++++++++++++++++++++++++ app/python/init_db.py | 16 ++++++ bin/fetch_cals.py | 93 --------------------------------- bin/import_cals.py | 124 -------------------------------------------- bin/init_db.py | 16 ------ bin/pyrun.sh | 2 +- config/reddit-import.json | 2 +- 8 files changed, 239 insertions(+), 235 deletions(-) create mode 100644 app/python/fetch_cals.py create mode 100644 app/python/import_cals.py create mode 100644 app/python/init_db.py delete mode 100644 bin/fetch_cals.py delete mode 100644 bin/import_cals.py delete mode 100644 bin/init_db.py diff --git a/app/python/fetch_cals.py b/app/python/fetch_cals.py new file mode 100644 index 0000000..30e7abc --- /dev/null +++ b/app/python/fetch_cals.py @@ -0,0 +1,93 @@ +import datetime + +import dateutil.parser as dateparser +import ics +import pytz +import requests +from rcal.db import Session +from rcal.model import Calendar, Entry + + +def update_event_data(db_event, ical_event): + db_event.name = ical_event.name + db_event.location = ical_event.location + db_event.begin_date = ical_event.begin.datetime + db_event.end_date = ical_event.end.datetime + db_event.all_day = ( + (db_event.end_date - db_event.begin_date).seconds % 86400 == 0 + ) and ( + db_event.begin_date.time() == datetime.time.min) + if db_event.all_day: + db_event.end_date = db_event.end_date - datetime.timedelta(days=1) + db_event.last_modified = get_last_modification_time(ical_event) + return db_event + + +def update_event(db_event, ical_event): + update_event_data(db_event, ical_event) + + +def add_event(event, calendar, session): + entry = Entry() + entry.uid = event.uid + entry.calendar = calendar + entry = update_event_data(entry, event) + session.add(entry) + + +def remove_event(event, session): + session.delete(event) + + +def get_last_modification_time(event): + for unused in event.__dict__['_unused']: + if unused.name == 'LAST-MODIFIED': + return dateparser.parse(unused.value) + return None + + +def fetch_calendar(calendar, session): + cal_data = requests.get(calendar.url) + cal_object = ics.Calendar(cal_data.content.decode(cal_data.encoding)) + cal_events = {e.uid: e for e in cal_object.events} + db_events = {e.uid: e for e in calendar.entries} + new_events = [e for u, e in cal_events.iteritems() + if u not in db_events.keys()] + old_events = [e for u, e in db_events.iteritems() + if u not in cal_events.keys()] + mod_events = [{'ics': cal_events[u], 'db': e} + for u, e in db_events.iteritems() if u in cal_events.keys()] + changes_present = False + for event in mod_events: + modified_date = get_last_modification_time(event['ics']) + if not modified_date or \ + not event['db'].last_modified or \ + modified_date > event['db'].last_modified.replace(tzinfo=pytz.UTC): + print 'Updating event %s' % event['db'].uid + update_event(event['db'], event['ics']) + changes_present = True + for event in new_events: + print 'Adding event %s' % event.uid + add_event(event, calendar, session) + changes_present = True + for event in old_events: + print 'Removing event %s' % event.uid + remove_event(event, session) + changes_present = True + if changes_present: + calendar.last_updated = datetime.datetime.now() + + +def main(): + session = Session.create() + + calendars = session.query(Calendar).all() + for calendar in calendars: + # print 'Fetching %s' % calendar.url + fetch_calendar(calendar, session) + + session.commit() + + +if __name__ == '__main__': + main() diff --git a/app/python/import_cals.py b/app/python/import_cals.py new file mode 100644 index 0000000..a08bfb2 --- /dev/null +++ b/app/python/import_cals.py @@ -0,0 +1,128 @@ +import json +import os +import re +import time +import urllib2 +import urlparse + +from sqlalchemy import inspect + +from rcal.db import Session +from rcal.model import Calendar, Category + +BASEPATH = os.path.join( + os.environ['PYTHONPATH'], + '..', + '..') + +CONFIG = json.load(open( + os.path.join( + BASEPATH, + 'config', + 'reddit-import.json'), + 'r')) + + +def get_cal_list(): + cache_path = os.path.join( + BASEPATH, + CONFIG['cache']) + if not os.path.exists(cache_path) or \ + int(time.time()) - int(os.path.getmtime(cache_path)) > \ + CONFIG['cache_time']: + opener = urllib2.build_opener() + opener.addheaders = [('User-Agent', CONFIG['user_agent'])] + cal_list = json.loads(opener.open(CONFIG['reddit_url']).read()) + cal_list = cal_list['data']['content_md'] + with open(cache_path, 'w') as cache_file: + cache_file.write(cal_list) + cache_file.close() + else: + cal_list = open(cache_path, 'r').read() + return cal_list + + +def update_calendar(cal, session): + db_cal = Calendar.fetch(cal['uid'], session) + + if inspect(db_cal).pending: + print 'Adding calendar %s (%s)' % (cal['name'], cal['uid']) + db_cal.name = cal['name'] + db_cal.url = cal['url'] + db_cal.website = cal['website'] + db_cal.category = Category.fetch(cal['category'], session) + + if db_cal.name != cal['name']: + print 'Updating calendar name: %s -> %s (%s)' % ( + db_cal.name, cal['name'], db_cal.uid) + db_cal.name = cal['name'] + if db_cal.url != cal['url']: + print 'Updating calendar url: %s -> %s (%s)' % ( + db_cal.url, cal['url'], db_cal.uid) + db_cal.url = cal['url'] + if db_cal.website != cal['website']: + print 'Updating calendar website: %s -> %s (%s)' % ( + db_cal.website, cal['website'], db_cal.uid) + db_cal.website = cal['website'] + + # informational only + if db_cal.category.name != cal['category']: + print 'Calendar category changed: %s -> %s (%s)' % ( + db_cal.category.name, cal['category'], db_cal.uid) + + +def get_imported_calendars(cells, ical_markdown): + imported_calendars = [] + for row in cells: + row = [r for r in row if r.strip()] + if len(row) == 5: + markdown_match = re.match(ical_markdown, row[2]) + if markdown_match: + ical_url = urlparse.urlparse(markdown_match.group(1)) + if ical_url.netloc == 'calendar.google.com': + ical_path = re.sub( + '^/?calendar/ical/', '', ical_url.path).split('/') + if len(ical_path) == 3: + imported_calendars.append({ + 'uid': ical_path[0], + 'url': ical_url.geturl(), + 'name': row[0], + 'website': row[4].split()[0], + 'category': row[1] + }) + else: + print 'Unknown iCal URL format: %s' % ( + ical_url.geturl()) + else: + print 'Unknown iCal URL format: %s' % ( + ical_url.geturl()) + return imported_calendars + + +def main(): + session = Session.create() + + cal_list = get_cal_list() + + ical_markdown = re.compile(r'^\[iCal\]\((.*)\)$') + cells = [row.split('|') for row in cal_list.split('\n')] + + imported_calendars = get_imported_calendars(cells, ical_markdown) + imported_calendar_uids = [c['uid'] for c in imported_calendars] + + db_only_calendars = session.query(Calendar).filter( + ~Calendar.uid.in_(imported_calendar_uids)).all() + + if len(db_only_calendars): + print 'Local calendars not in remote source:' + for cal in db_only_calendars: + print '%s (%s)' % (cal.name, cal.uid) + print + + for cal in imported_calendars: + update_calendar(cal, session) + + session.commit() + +if __name__ == '__main__': + main() diff --git a/app/python/init_db.py b/app/python/init_db.py new file mode 100644 index 0000000..7d48e80 --- /dev/null +++ b/app/python/init_db.py @@ -0,0 +1,16 @@ +import sys + +from rcal.db import Session +from rcal.model import BASE + + +def main(): + session = Session.create() + + if len(sys.argv) > 1 and sys.argv[1] == 'force': + BASE.metadata.drop_all(session.get_bind()) + + BASE.metadata.create_all(session.get_bind()) + +if __name__ == '__main__': + main() diff --git a/bin/fetch_cals.py b/bin/fetch_cals.py deleted file mode 100644 index 30e7abc..0000000 --- a/bin/fetch_cals.py +++ /dev/null @@ -1,93 +0,0 @@ -import datetime - -import dateutil.parser as dateparser -import ics -import pytz -import requests -from rcal.db import Session -from rcal.model import Calendar, Entry - - -def update_event_data(db_event, ical_event): - db_event.name = ical_event.name - db_event.location = ical_event.location - db_event.begin_date = ical_event.begin.datetime - db_event.end_date = ical_event.end.datetime - db_event.all_day = ( - (db_event.end_date - db_event.begin_date).seconds % 86400 == 0 - ) and ( - db_event.begin_date.time() == datetime.time.min) - if db_event.all_day: - db_event.end_date = db_event.end_date - datetime.timedelta(days=1) - db_event.last_modified = get_last_modification_time(ical_event) - return db_event - - -def update_event(db_event, ical_event): - update_event_data(db_event, ical_event) - - -def add_event(event, calendar, session): - entry = Entry() - entry.uid = event.uid - entry.calendar = calendar - entry = update_event_data(entry, event) - session.add(entry) - - -def remove_event(event, session): - session.delete(event) - - -def get_last_modification_time(event): - for unused in event.__dict__['_unused']: - if unused.name == 'LAST-MODIFIED': - return dateparser.parse(unused.value) - return None - - -def fetch_calendar(calendar, session): - cal_data = requests.get(calendar.url) - cal_object = ics.Calendar(cal_data.content.decode(cal_data.encoding)) - cal_events = {e.uid: e for e in cal_object.events} - db_events = {e.uid: e for e in calendar.entries} - new_events = [e for u, e in cal_events.iteritems() - if u not in db_events.keys()] - old_events = [e for u, e in db_events.iteritems() - if u not in cal_events.keys()] - mod_events = [{'ics': cal_events[u], 'db': e} - for u, e in db_events.iteritems() if u in cal_events.keys()] - changes_present = False - for event in mod_events: - modified_date = get_last_modification_time(event['ics']) - if not modified_date or \ - not event['db'].last_modified or \ - modified_date > event['db'].last_modified.replace(tzinfo=pytz.UTC): - print 'Updating event %s' % event['db'].uid - update_event(event['db'], event['ics']) - changes_present = True - for event in new_events: - print 'Adding event %s' % event.uid - add_event(event, calendar, session) - changes_present = True - for event in old_events: - print 'Removing event %s' % event.uid - remove_event(event, session) - changes_present = True - if changes_present: - calendar.last_updated = datetime.datetime.now() - - -def main(): - session = Session.create() - - calendars = session.query(Calendar).all() - for calendar in calendars: - # print 'Fetching %s' % calendar.url - fetch_calendar(calendar, session) - - session.commit() - - -if __name__ == '__main__': - main() diff --git a/bin/import_cals.py b/bin/import_cals.py deleted file mode 100644 index 07b8967..0000000 --- a/bin/import_cals.py +++ /dev/null @@ -1,124 +0,0 @@ -import json -import os -import re -import time -import urllib2 -import urlparse - -from sqlalchemy import inspect - -from rcal.db import Session -from rcal.model import Calendar, Category - -CONFIG = json.load(open( - os.path.join( - os.path.dirname(os.path.realpath(__file__)), - '..', - 'config', - 'reddit-import.json'), - 'r')) - - -def get_cal_list(): - cache_path = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - CONFIG['cache']) - if not os.path.exists(cache_path) or \ - int(time.time()) - int(os.path.getmtime(cache_path)) > \ - CONFIG['cache_time']: - opener = urllib2.build_opener() - opener.addheaders = [('User-Agent', CONFIG['user_agent'])] - cal_list = json.loads(opener.open(CONFIG['reddit_url']).read()) - cal_list = cal_list['data']['content_md'] - with open(cache_path, 'w') as cache_file: - cache_file.write(cal_list) - cache_file.close() - else: - cal_list = open(cache_path, 'r').read() - return cal_list - - -def update_calendar(cal, session): - db_cal = Calendar.fetch(cal['uid'], session) - - if inspect(db_cal).pending: - print 'Adding calendar %s (%s)' % (cal['name'], cal['uid']) - db_cal.name = cal['name'] - db_cal.url = cal['url'] - db_cal.website = cal['website'] - db_cal.category = Category.fetch(cal['category'], session) - - if db_cal.name != cal['name']: - print 'Updating calendar name: %s -> %s (%s)' % ( - db_cal.name, cal['name'], db_cal.uid) - db_cal.name = cal['name'] - if db_cal.url != cal['url']: - print 'Updating calendar url: %s -> %s (%s)' % ( - db_cal.url, cal['url'], db_cal.uid) - db_cal.url = cal['url'] - if db_cal.website != cal['website']: - print 'Updating calendar website: %s -> %s (%s)' % ( - db_cal.website, cal['website'], db_cal.uid) - db_cal.website = cal['website'] - - # informational only - if db_cal.category.name != cal['category']: - print 'Calendar category changed: %s -> %s (%s)' % ( - db_cal.category.name, cal['category'], db_cal.uid) - - -def get_imported_calendars(cells, ical_markdown): - imported_calendars = [] - for row in cells: - row = [r for r in row if r.strip()] - if len(row) == 5: - markdown_match = re.match(ical_markdown, row[2]) - if markdown_match: - ical_url = urlparse.urlparse(markdown_match.group(1)) - if ical_url.netloc == 'calendar.google.com': - ical_path = re.sub( - '^/?calendar/ical/', '', ical_url.path).split('/') - if len(ical_path) == 3: - imported_calendars.append({ - 'uid': ical_path[0], - 'url': ical_url.geturl(), - 'name': row[0], - 'website': row[4].split()[0], - 'category': row[1] - }) - else: - print 'Unknown iCal URL format: %s' % ( - ical_url.geturl()) - else: - print 'Unknown iCal URL format: %s' % ( - ical_url.geturl()) - return imported_calendars - - -def main(): - session = Session.create() - - cal_list = get_cal_list() - - ical_markdown = re.compile(r'^\[iCal\]\((.*)\)$') - cells = [row.split('|') for row in cal_list.split('\n')] - - imported_calendars = get_imported_calendars(cells, ical_markdown) - imported_calendar_uids = [c['uid'] for c in imported_calendars] - - db_only_calendars = session.query(Calendar).filter( - ~Calendar.uid.in_(imported_calendar_uids)).all() - - if len(db_only_calendars): - print 'Local calendars not in remote source:' - for cal in db_only_calendars: - print '%s (%s)' % (cal.name, cal.uid) - print - - for cal in imported_calendars: - update_calendar(cal, session) - - session.commit() - -if __name__ == '__main__': - main() diff --git a/bin/init_db.py b/bin/init_db.py deleted file mode 100644 index 7d48e80..0000000 --- a/bin/init_db.py +++ /dev/null @@ -1,16 +0,0 @@ -import sys - -from rcal.db import Session -from rcal.model import BASE - - -def main(): - session = Session.create() - - if len(sys.argv) > 1 and sys.argv[1] == 'force': - BASE.metadata.drop_all(session.get_bind()) - - BASE.metadata.create_all(session.get_bind()) - -if __name__ == '__main__': - main() diff --git a/bin/pyrun.sh b/bin/pyrun.sh index 36a3339..69c63b0 100755 --- a/bin/pyrun.sh +++ b/bin/pyrun.sh @@ -3,4 +3,4 @@ DIR=$(dirname $0) SCRIPT=$(echo $1 | sed 's/\.py\$//') shift export PYTHONPATH=$DIR/../app/python/ -python $DIR/$SCRIPT.py $@ +python $PYTHONPATH/$SCRIPT.py $@ diff --git a/config/reddit-import.json b/config/reddit-import.json index 0016703..f15bd77 100644 --- a/config/reddit-import.json +++ b/config/reddit-import.json @@ -1,5 +1,5 @@ { - "cache": "../cache/list.md", + "cache": "cache/list.md", "cache_time": 86400, "reddit_url": "https://www.reddit.com/r/MotorsportsCalendar/wiki/index.json", "user_agent": "/u/emkael" -- cgit v1.2.3