summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2016-02-19 13:30:20 +0100
committeremkael <emkael@tlen.pl>2016-02-19 13:55:06 +0100
commit7a384588d8cf273ec078d3370eebc4087eb92d75 (patch)
tree5da331075a837d28753c9cd6517ca45fad9035d0
parent53078b3f387216b21bf8ab27636511f4373dc9f6 (diff)
* calendar import script (from reddit markdown)
-rw-r--r--conf/reddit-import.json6
-rw-r--r--import_cals.py67
-rw-r--r--init_db.py6
3 files changed, 76 insertions, 3 deletions
diff --git a/conf/reddit-import.json b/conf/reddit-import.json
new file mode 100644
index 0000000..4101f80
--- /dev/null
+++ b/conf/reddit-import.json
@@ -0,0 +1,6 @@
+{
+ "cache": "cache/list.md",
+ "cache_time": 86400,
+ "reddit_url": "https://www.reddit.com/r/MotorsportsCalendar/comments/45uv93/2016_calendar_list.json",
+ "user_agent": "/u/emkael"
+}
diff --git a/import_cals.py b/import_cals.py
new file mode 100644
index 0000000..09c3272
--- /dev/null
+++ b/import_cals.py
@@ -0,0 +1,67 @@
+import json
+import os
+import re
+import time
+import urllib2
+import urlparse
+
+from rcal.db import Session
+from rcal.model import Calendar, Category
+
+CONFIG = json.load(open(
+ os.path.join(
+ os.path.dirname(os.path.realpath(__file__)),
+ 'conf',
+ 'reddit-import.json'),
+ 'r'))
+
+
+def main():
+ cal_list = None
+
+ if not os.path.exists(CONFIG['cache']) or \
+ int(time.time()) - int(os.path.getmtime(CONFIG['cache'])) > \
+ CONFIG['cache_time']:
+ opener = urllib2.build_opener()
+ opener.addheaders = [('User-Agent', CONFIG['user_agent'])]
+ cal_list = json.loads(opener.open(CONFIG['reddit_url']).read())
+ cal_list = cal_list[0]['data']['children'][0]['data']['selftext']
+ with open(CONFIG['cache'], 'w') as cache_file:
+ cache_file.write(cal_list)
+ cache_file.close()
+ else:
+ cal_list = open(CONFIG['cache'], 'r').read()
+
+ session = Session.create()
+
+ ical_markdown = re.compile(r'^\[iCal\]\((.*)\)$')
+
+ cells = [row.split('|') for row in cal_list.split('\n')]
+ for row in cells:
+ if len(row) == 7:
+ row = [r for r in row if r]
+ if len(row) == 5:
+ markdown_match = re.match(ical_markdown, row[2])
+ if markdown_match:
+ ical_url = urlparse.urlparse(markdown_match.group(1))
+ if ical_url.netloc == 'calendar.google.com':
+ ical_path = re.sub(
+ '^/?calendar/ical/', '', ical_url.path).split('/')
+ if len(ical_path) == 3:
+ calendar_uid = ical_path[0]
+ calendar = Calendar.fetch(
+ calendar_uid,
+ session,
+ row[0],
+ ical_url.geturl())
+ calendar.website = row[4]
+ calendar.category = Category.fetch(row[1], session)
+ else:
+ print ical_url.geturl()
+ else:
+ print ical_url.geturl()
+
+ session.commit()
+
+if __name__ == '__main__':
+ main()
diff --git a/init_db.py b/init_db.py
index da3e641..7d48e80 100644
--- a/init_db.py
+++ b/init_db.py
@@ -1,16 +1,16 @@
import sys
from rcal.db import Session
-from rcal.model import Base
+from rcal.model import BASE
def main():
session = Session.create()
if len(sys.argv) > 1 and sys.argv[1] == 'force':
- Base.metadata.drop_all(session.get_bind())
+ BASE.metadata.drop_all(session.get_bind())
- Base.metadata.create_all(session.get_bind())
+ BASE.metadata.create_all(session.get_bind())
if __name__ == '__main__':
main()