#!/usr/bin/python2.4
# -*- coding: utf-8 -*-

'''
convert-academic-calendar.py
============================

This is a quick-'n'-dirty script that converts the Rose-Hulman 
Institute of Technology's academic calendar page to iCalendar 
format so that the data are more useful.

:Author:    Tom Most
:Version:   1.0
:Created:   20 June 2007
:Modified:  20 June 2007
'''

import os
import sys
import re
import datetime
import urllib

from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
from icalendar import Calendar, Event, UTC

APPNAME = os.path.split(__file__)[1]

PAGE_URL = 'http://www.rose-hulman.edu/Users/groups/AcademicAffairs/2007-2008.html'
OUTFILE = 'rhit-academic-calendar-07-08.ics'

MONTHS_2007 = 'August September October November December'.split()
MONTHS_2008 = 'January February March April May'.split()

MONTHS = MONTHS_2008 + ['June', 'July'] + MONTHS_2007

TIME_RE = re.compile('(\s+-\s+)?(?P<hour>\d?\d):(?P<minute>\d{2})\s*(?P<period>[ap])\.?m\.?')

socket = urllib.urlopen(PAGE_URL)
html = socket.read()
socket.close()

soup = BeautifulSoup(html, convertEntities=BeautifulStoneSoup.HTML_ENTITIES)


def text_content(node):
	'''Get all of the text the node contains
	as a single string.'''
	return u''.join(node.findAll(text=True))


def utc_date(year, month, day, hour=None, minute=None):
	'''Convert a naive date (and time) from local
	to UTC.'''
	
	if hour is not None and minute is not None:
		date = datetime.datetime(year, month, day, hour, minute, tzinfo=UTC)
	else:
		date = datetime.date(year, month, day)
	
	# Find offset from UTC
	if is_dst(year, month, day):
		offset = -4
	else:
		offset = -5
	# Remove the offset to get a UTC date
	date -= datetime.timedelta(hours=offset)
	
	return date


def is_dst(year, month, day):
	'''Test whether the specified date is subject to
	DST adjustment.  Probably has fencepost errors.
	
	Only intended for input dates in the range Aug 2007
	to May 2008.'''
	date = datetime.date(year, month, day)
	if (year == 2007 and date < datetime.date(2007, 11, 4)) or \
			(year == 2008 and date > datetime.date(2008, 3, 9)):
		return True
	else:
		return False


cal = Calendar()
cal.add('prodid', '-//%s/freecog.net/' % APPNAME)
cal.add('version', '2.0')
cal.add('calscale', 'GREGORIAN')
# Give it a nice name in Google Calendar
cal['X-WR-CALNAME'] = 'RHIT Academic Calendar'
# Prevent Outlook 2003 from choking (see iCalendar page at Wikipedia)
cal['METHOD'] = 'PUBLISH'

# I don't really know what these are, but Google Calendar
# adds them with strange data when I reexport the events.
cal['x-wr-timezone'] = 'America/Indianapolis'


# Extract data from the tables
rows = soup.findAll("tr")
for rowcount, row in enumerate(rows):
	cells = row.findAll("td", recursive=False)
	
	# Skip non-data rows
	if not len(cells) == 3:
		continue
	
	month_name, dayrange = text_content(cells[0]).split(None, 1)
	month = MONTHS.index(month_name) + 1
	year = (month_name in MONTHS_2007) and 2007 or 2008
	
	details = text_content(cells[2]).strip()
	
	for subcount, details in enumerate(details.split(' & ')):
		event = Event()
		time = re.search(TIME_RE, details)
		summary = re.sub(TIME_RE, '', details).strip()
		summary = ' '.join(summary.split()) # Clean up whitespace
		summary = summary.replace(u' - ', u' — ') # Prettier
		event.add('summary', summary)
		
		event.add('status', 'CONFIRMED')
		
		# Prevent Outlook 2003 from choking (see iCalendar page at Wikipedia)
		event.add('dtstamp', datetime.datetime.now())
		
		days = dayrange.split('-')
		start_day = int(days[0])
		
		end_date = None
		
		if len(days) > 1:
			end_day = int(days[1])
			end_date = utc_date(year, month, end_day)
			event.add('dtend', end_date)
		
		if time:
			hour = int(time.group('hour'))
			minute = int(time.group('minute'))
			if time.group('period') == 'p': # p.m.
				hour += 12
		else:
			hour, minute = None, None
		
		start_date = utc_date(year, month, start_day, hour, minute)
		event.add('dtstart', start_date)
		event.add('uid', '%s-%s-%s.%s.%s-%s@freecog.net' % (
			rowcount, subcount, year, month, dayrange, APPNAME))
		cal.add_component(event)

fout = open(OUTFILE, 'wb')
try:
	fout.write(cal.as_string())
finally:
	fout.close()