#!/usr/bin/python
# -*- coding: utf-8 -*-
# usage: for i in *.html; do echo $i; python html2pt.py $i;done
import sys
import codecs
import re
from time import strftime, gmtime

command_line_argument = sys.argv[-1]
dateString = '<meta name="date" content="%s"/>' % strftime("%Y-%m-%d", gmtime())
text = codecs.open(command_line_argument, 'r', "utf-8").read()
text = re.sub(r'(href="\./.*?)\.graphml"', r'\1.pt"',text)
text = re.sub('<!-- insert date here -->',dateString,text)
# open external links in a new tab
text = re.sub(r'(?P<firstBit><area.*?) href="http(?P<secondBit>.*?\/>)', r'\g<firstBit> target="_blank" href="http\g<secondBit>', text)

output_file = codecs.open(command_line_argument.replace("html","pt"), 'w', 'utf-8')
output_file.write(text)
output_file.close
