kupu/python/nationalizer.py - view

File: [Repository] / kupu / python / nationalizer.py
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Thu Sep 15 13:06:00 2005 UTC (18 years, 9 months ago) by dwinter
Branches: first, MAIN
CVS tags: dwinter, HEAD

modifizierter kupu fuer webpages des instituts

1: #!/usr/bin/python2.3 2: 3: """Return the Kupu .html file with i18n applied""" 4: 5: from xml.dom.minidom import parseString 6: import os 7: 8: ID = 0 9: STR = 1 10: 11: I18NNS = 'http://xml.zope.org/namespaces/i18n' 12: 13: def ustr(i): 14: if type(i) == unicode: 15: return i 16: else: 17: return unicode(str(i), 'UTF-8') 18: 19: def get_locale(): 20: if os.environ.has_key('HTTP_ACCEPT_LANGUAGE'): 21: charsets = [l.strip() for l in 22: os.environ['HTTP_ACCEPT_LANGUAGE'].split(';')[0].split(',')] 23: return charsets 24: 25: class Nationalizer: 26: """Translates string in an HTML or XML file using i18n: directives""" 27: 28: not_single = ['a', 'abbr', 'acronym', 'address', 'applet', 29: 'b', 'bdo', 'big', 'blink', 'blockquote', 30: 'button', 'caption', 'center', 'cite', 31: 'comment', 'del', 'dfn', 'dir', 'div', 32: 'dl', 'dt', 'em', 'embed', 'fieldset', 33: 'font', 'form', 'frameset', 'h1', 'h2', 34: 'h3', 'h4', 'h5', 'h6', 'i', 'iframe', 35: 'ins', 'kbd', 'label', 'legend', 'li', 36: 'listing', 'map', 'marquee', 'menu', 37: 'multicol', 'nobr', 'noembed', 'noframes', 38: 'noscript', 'object', 'ol', 'optgroup', 39: 'option', 'p', 'pre', 'q', 's', 'script', 40: 'select', 'small', 'span', 'strike', 41: 'strong', 'style', 'sub', 'sup', 'table', 42: 'tbody', 'td', 'textarea', 'tfoot', 43: 'th', 'thead', 'title', 'tr', 'tt', 'u', 44: 'ul', 'xmp'] 45: 46: def __init__(self, htmlfile, locale): 47: self.htmlfile = htmlfile 48: self.locale = locale 49: 50: def translate(self): 51: """load and translate everything""" 52: popath = self.get_po_file_path(self.locale) 53: if popath is not None: 54: pofp = open(popath) 55: try: 56: msgcat = self.parse_po_file(pofp) 57: finally: 58: pofp.close() 59: else: 60: # if no pofile, parse anyway to get rid of those nasty i18n: 61: # attributes (obviously not very fast, perhaps we need to either 62: # cache a parsed version and send that back or just remove the 63: # attributes here) 64: msgcat = {} 65: xmlfp = open(self.htmlfile) 66: try: 67: xml = xmlfp.read() 68: finally: 69: xmlfp.close() 70: dom = parseString(xml) 71: self.apply_i18n(dom, msgcat) 72: return self.serialize(dom.documentElement) 73: 74: def parse_po_file(self, pofp): 75: """parse the .po file, create a mapping msgid->msgstr""" 76: cat = {} 77: state = None 78: msgid = None 79: msgstr = None 80: for line in pofp.readlines(): 81: line = line.strip() 82: if line.startswith('#') or not line: 83: continue 84: if line.startswith('msgid'): 85: if msgid and msgstr: 86: cat[msgid] = msgstr 87: msgid = line[7:-1] 88: state = ID 89: elif line.startswith('msgstr'): 90: msgstr = line[8:-1] 91: else: 92: # ignore for now, might be a multiline msgstr, if we 93: # want to support those we should add some code here... 94: pass 95: if msgid and msgstr: 96: cat[msgid] = msgstr 97: return cat 98: 99: def apply_i18n(self, dom, msgcat): 100: """apply nationalization of the full dom""" 101: nodes = dom.documentElement.getElementsByTagName('*') 102: for node in nodes: 103: if node.hasAttributeNS(I18NNS, 'translate'): 104: self.apply_translate(node, msgcat) 105: if node.hasAttributeNS(I18NNS, 'attributes'): 106: self.apply_attributes(node, msgcat) 107: 108: def apply_translate(self, node, msgcat): 109: """handle Zope-style i18n:translate""" 110: buf = [] 111: msgid = msgstr = node.getAttributeNS(I18NNS, 'translate').strip() 112: if not msgid: 113: # no msgid in the attribute, use the node value 114: for child in node.childNodes: 115: if child.nodeType == 3: 116: buf.append(child.nodeValue) 117: else: 118: raise TypeError, \ 119: ('illegal element %s in i18n:translate element' % 120: child.nodeName) 121: msgid = msgstr = self.reduce_whitespace(u''.join(buf).strip()) 122: if msgcat.has_key(msgid): 123: msgstr = msgcat[msgid] 124: # now replace the contents of the node with the new contents 125: while node.hasChildNodes(): 126: node.removeChild(node.firstChild) 127: node.removeAttributeNS(I18NNS, 'translate') 128: node.appendChild(node.ownerDocument.createTextNode(msgstr)) 129: 130: def apply_attributes(self, node, msgcat): 131: """handle Zope-style i18n:attributes""" 132: attrnames = node.getAttributeNS(I18NNS, 'attributes').split(' ') 133: for attr in attrnames: 134: value = node.getAttribute(attr) 135: if value and msgcat.has_key(value): 136: node.setAttribute(attr, unicode(msgcat[value], 'UTF-8')) 137: node.removeAttributeNS(I18NNS, 'attributes') 138: 139: def reduce_whitespace(self, string): 140: for char in ['\n', '\t', '\r']: 141: string = string.replace(char, ' ') 142: while string.find(' ') > -1: 143: string = string.replace(' ', ' ') 144: return string 145: 146: def get_po_file_path(self, locale): 147: for language in locale: 148: startdir = '../i18n' 149: language = language.split('-') 150: pathstart = '%s/kupu-%s' % (startdir, language[0]) 151: paths = [] 152: if len(language) == 2: 153: paths.append('%s-%s.po' % (pathstart, language[1])) 154: paths += [ 155: '%s-default.po' % pathstart, 156: '%s.po' % pathstart, 157: ] 158: for path in paths: 159: if os.path.isfile(path): 160: return path 161: 162: def serialize(self, el): 163: buf = [] 164: if el.nodeType == 1: 165: buf.append('<%s' % el.nodeName) 166: if len(el.attributes): 167: for attr, value in el.attributes.items(): 168: if value is not None: 169: buf.append(' %s="%s"' % (attr, self.entitize(value))) 170: if el.hasChildNodes() or el.nodeName in self.not_single: 171: buf.append('>') 172: for child in el.childNodes: 173: buf += self.serialize(child) 174: buf.append('</%s>' % el.nodeName) 175: else: 176: buf.append(' />') 177: elif el.nodeType == 3: 178: buf.append(el.nodeValue) 179: else: 180: print 'ignoring node of type', node.nodeType 181: return ''.join([ustr(b) for b in buf]) 182: 183: def entitize(self, string): 184: string = string.replace('&', '&') 185: string = string.replace('<', '<') 186: string = string.replace('>', '>') 187: string = string.replace('"', '"') 188: return string 189: 190: if __name__ == '__main__': 191: # test code 192: os.chdir(os.path.abspath(os.path.dirname(__file__))) 193: i = Nationalizer('../common/kupu.html', ['nl']) 194: print i.translate().encode('UTF-8')