Annotation of kupu/python/nationalizer.py, revision 1.1
1.1 ! dwinter 1: #!/usr/bin/python2.3
! 2:
! 3: """Return the Kupu .html file with i18n applied"""
! 4:
! 5: from xml.dom.minidom import parseString
! 6: import os
! 7:
! 8: ID = 0
! 9: STR = 1
! 10:
! 11: I18NNS = 'http://xml.zope.org/namespaces/i18n'
! 12:
! 13: def ustr(i):
! 14: if type(i) == unicode:
! 15: return i
! 16: else:
! 17: return unicode(str(i), 'UTF-8')
! 18:
! 19: def get_locale():
! 20: if os.environ.has_key('HTTP_ACCEPT_LANGUAGE'):
! 21: charsets = [l.strip() for l in
! 22: os.environ['HTTP_ACCEPT_LANGUAGE'].split(';')[0].split(',')]
! 23: return charsets
! 24:
! 25: class Nationalizer:
! 26: """Translates string in an HTML or XML file using i18n: directives"""
! 27:
! 28: not_single = ['a', 'abbr', 'acronym', 'address', 'applet',
! 29: 'b', 'bdo', 'big', 'blink', 'blockquote',
! 30: 'button', 'caption', 'center', 'cite',
! 31: 'comment', 'del', 'dfn', 'dir', 'div',
! 32: 'dl', 'dt', 'em', 'embed', 'fieldset',
! 33: 'font', 'form', 'frameset', 'h1', 'h2',
! 34: 'h3', 'h4', 'h5', 'h6', 'i', 'iframe',
! 35: 'ins', 'kbd', 'label', 'legend', 'li',
! 36: 'listing', 'map', 'marquee', 'menu',
! 37: 'multicol', 'nobr', 'noembed', 'noframes',
! 38: 'noscript', 'object', 'ol', 'optgroup',
! 39: 'option', 'p', 'pre', 'q', 's', 'script',
! 40: 'select', 'small', 'span', 'strike',
! 41: 'strong', 'style', 'sub', 'sup', 'table',
! 42: 'tbody', 'td', 'textarea', 'tfoot',
! 43: 'th', 'thead', 'title', 'tr', 'tt', 'u',
! 44: 'ul', 'xmp']
! 45:
! 46: def __init__(self, htmlfile, locale):
! 47: self.htmlfile = htmlfile
! 48: self.locale = locale
! 49:
! 50: def translate(self):
! 51: """load and translate everything"""
! 52: popath = self.get_po_file_path(self.locale)
! 53: if popath is not None:
! 54: pofp = open(popath)
! 55: try:
! 56: msgcat = self.parse_po_file(pofp)
! 57: finally:
! 58: pofp.close()
! 59: else:
! 60: # if no pofile, parse anyway to get rid of those nasty i18n:
! 61: # attributes (obviously not very fast, perhaps we need to either
! 62: # cache a parsed version and send that back or just remove the
! 63: # attributes here)
! 64: msgcat = {}
! 65: xmlfp = open(self.htmlfile)
! 66: try:
! 67: xml = xmlfp.read()
! 68: finally:
! 69: xmlfp.close()
! 70: dom = parseString(xml)
! 71: self.apply_i18n(dom, msgcat)
! 72: return self.serialize(dom.documentElement)
! 73:
! 74: def parse_po_file(self, pofp):
! 75: """parse the .po file, create a mapping msgid->msgstr"""
! 76: cat = {}
! 77: state = None
! 78: msgid = None
! 79: msgstr = None
! 80: for line in pofp.readlines():
! 81: line = line.strip()
! 82: if line.startswith('#') or not line:
! 83: continue
! 84: if line.startswith('msgid'):
! 85: if msgid and msgstr:
! 86: cat[msgid] = msgstr
! 87: msgid = line[7:-1]
! 88: state = ID
! 89: elif line.startswith('msgstr'):
! 90: msgstr = line[8:-1]
! 91: else:
! 92: # ignore for now, might be a multiline msgstr, if we
! 93: # want to support those we should add some code here...
! 94: pass
! 95: if msgid and msgstr:
! 96: cat[msgid] = msgstr
! 97: return cat
! 98:
! 99: def apply_i18n(self, dom, msgcat):
! 100: """apply nationalization of the full dom"""
! 101: nodes = dom.documentElement.getElementsByTagName('*')
! 102: for node in nodes:
! 103: if node.hasAttributeNS(I18NNS, 'translate'):
! 104: self.apply_translate(node, msgcat)
! 105: if node.hasAttributeNS(I18NNS, 'attributes'):
! 106: self.apply_attributes(node, msgcat)
! 107:
! 108: def apply_translate(self, node, msgcat):
! 109: """handle Zope-style i18n:translate"""
! 110: buf = []
! 111: msgid = msgstr = node.getAttributeNS(I18NNS, 'translate').strip()
! 112: if not msgid:
! 113: # no msgid in the attribute, use the node value
! 114: for child in node.childNodes:
! 115: if child.nodeType == 3:
! 116: buf.append(child.nodeValue)
! 117: else:
! 118: raise TypeError, \
! 119: ('illegal element %s in i18n:translate element' %
! 120: child.nodeName)
! 121: msgid = msgstr = self.reduce_whitespace(u''.join(buf).strip())
! 122: if msgcat.has_key(msgid):
! 123: msgstr = msgcat[msgid]
! 124: # now replace the contents of the node with the new contents
! 125: while node.hasChildNodes():
! 126: node.removeChild(node.firstChild)
! 127: node.removeAttributeNS(I18NNS, 'translate')
! 128: node.appendChild(node.ownerDocument.createTextNode(msgstr))
! 129:
! 130: def apply_attributes(self, node, msgcat):
! 131: """handle Zope-style i18n:attributes"""
! 132: attrnames = node.getAttributeNS(I18NNS, 'attributes').split(' ')
! 133: for attr in attrnames:
! 134: value = node.getAttribute(attr)
! 135: if value and msgcat.has_key(value):
! 136: node.setAttribute(attr, unicode(msgcat[value], 'UTF-8'))
! 137: node.removeAttributeNS(I18NNS, 'attributes')
! 138:
! 139: def reduce_whitespace(self, string):
! 140: for char in ['\n', '\t', '\r']:
! 141: string = string.replace(char, ' ')
! 142: while string.find(' ') > -1:
! 143: string = string.replace(' ', ' ')
! 144: return string
! 145:
! 146: def get_po_file_path(self, locale):
! 147: for language in locale:
! 148: startdir = '../i18n'
! 149: language = language.split('-')
! 150: pathstart = '%s/kupu-%s' % (startdir, language[0])
! 151: paths = []
! 152: if len(language) == 2:
! 153: paths.append('%s-%s.po' % (pathstart, language[1]))
! 154: paths += [
! 155: '%s-default.po' % pathstart,
! 156: '%s.po' % pathstart,
! 157: ]
! 158: for path in paths:
! 159: if os.path.isfile(path):
! 160: return path
! 161:
! 162: def serialize(self, el):
! 163: buf = []
! 164: if el.nodeType == 1:
! 165: buf.append('<%s' % el.nodeName)
! 166: if len(el.attributes):
! 167: for attr, value in el.attributes.items():
! 168: if value is not None:
! 169: buf.append(' %s="%s"' % (attr, self.entitize(value)))
! 170: if el.hasChildNodes() or el.nodeName in self.not_single:
! 171: buf.append('>')
! 172: for child in el.childNodes:
! 173: buf += self.serialize(child)
! 174: buf.append('</%s>' % el.nodeName)
! 175: else:
! 176: buf.append(' />')
! 177: elif el.nodeType == 3:
! 178: buf.append(el.nodeValue)
! 179: else:
! 180: print 'ignoring node of type', node.nodeType
! 181: return ''.join([ustr(b) for b in buf])
! 182:
! 183: def entitize(self, string):
! 184: string = string.replace('&', '&')
! 185: string = string.replace('<', '<')
! 186: string = string.replace('>', '>')
! 187: string = string.replace('"', '"')
! 188: return string
! 189:
! 190: if __name__ == '__main__':
! 191: # test code
! 192: os.chdir(os.path.abspath(os.path.dirname(__file__)))
! 193: i = Nationalizer('../common/kupu.html', ['nl'])
! 194: print i.translate().encode('UTF-8')
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>