Annotation of kupu/python/nationalizer.py, revision 1.1.1.1
1.1 dwinter 1: #!/usr/bin/python2.3
2:
3: """Return the Kupu .html file with i18n applied"""
4:
5: from xml.dom.minidom import parseString
6: import os
7:
8: ID = 0
9: STR = 1
10:
11: I18NNS = 'http://xml.zope.org/namespaces/i18n'
12:
13: def ustr(i):
14: if type(i) == unicode:
15: return i
16: else:
17: return unicode(str(i), 'UTF-8')
18:
19: def get_locale():
20: if os.environ.has_key('HTTP_ACCEPT_LANGUAGE'):
21: charsets = [l.strip() for l in
22: os.environ['HTTP_ACCEPT_LANGUAGE'].split(';')[0].split(',')]
23: return charsets
24:
25: class Nationalizer:
26: """Translates string in an HTML or XML file using i18n: directives"""
27:
28: not_single = ['a', 'abbr', 'acronym', 'address', 'applet',
29: 'b', 'bdo', 'big', 'blink', 'blockquote',
30: 'button', 'caption', 'center', 'cite',
31: 'comment', 'del', 'dfn', 'dir', 'div',
32: 'dl', 'dt', 'em', 'embed', 'fieldset',
33: 'font', 'form', 'frameset', 'h1', 'h2',
34: 'h3', 'h4', 'h5', 'h6', 'i', 'iframe',
35: 'ins', 'kbd', 'label', 'legend', 'li',
36: 'listing', 'map', 'marquee', 'menu',
37: 'multicol', 'nobr', 'noembed', 'noframes',
38: 'noscript', 'object', 'ol', 'optgroup',
39: 'option', 'p', 'pre', 'q', 's', 'script',
40: 'select', 'small', 'span', 'strike',
41: 'strong', 'style', 'sub', 'sup', 'table',
42: 'tbody', 'td', 'textarea', 'tfoot',
43: 'th', 'thead', 'title', 'tr', 'tt', 'u',
44: 'ul', 'xmp']
45:
46: def __init__(self, htmlfile, locale):
47: self.htmlfile = htmlfile
48: self.locale = locale
49:
50: def translate(self):
51: """load and translate everything"""
52: popath = self.get_po_file_path(self.locale)
53: if popath is not None:
54: pofp = open(popath)
55: try:
56: msgcat = self.parse_po_file(pofp)
57: finally:
58: pofp.close()
59: else:
60: # if no pofile, parse anyway to get rid of those nasty i18n:
61: # attributes (obviously not very fast, perhaps we need to either
62: # cache a parsed version and send that back or just remove the
63: # attributes here)
64: msgcat = {}
65: xmlfp = open(self.htmlfile)
66: try:
67: xml = xmlfp.read()
68: finally:
69: xmlfp.close()
70: dom = parseString(xml)
71: self.apply_i18n(dom, msgcat)
72: return self.serialize(dom.documentElement)
73:
74: def parse_po_file(self, pofp):
75: """parse the .po file, create a mapping msgid->msgstr"""
76: cat = {}
77: state = None
78: msgid = None
79: msgstr = None
80: for line in pofp.readlines():
81: line = line.strip()
82: if line.startswith('#') or not line:
83: continue
84: if line.startswith('msgid'):
85: if msgid and msgstr:
86: cat[msgid] = msgstr
87: msgid = line[7:-1]
88: state = ID
89: elif line.startswith('msgstr'):
90: msgstr = line[8:-1]
91: else:
92: # ignore for now, might be a multiline msgstr, if we
93: # want to support those we should add some code here...
94: pass
95: if msgid and msgstr:
96: cat[msgid] = msgstr
97: return cat
98:
99: def apply_i18n(self, dom, msgcat):
100: """apply nationalization of the full dom"""
101: nodes = dom.documentElement.getElementsByTagName('*')
102: for node in nodes:
103: if node.hasAttributeNS(I18NNS, 'translate'):
104: self.apply_translate(node, msgcat)
105: if node.hasAttributeNS(I18NNS, 'attributes'):
106: self.apply_attributes(node, msgcat)
107:
108: def apply_translate(self, node, msgcat):
109: """handle Zope-style i18n:translate"""
110: buf = []
111: msgid = msgstr = node.getAttributeNS(I18NNS, 'translate').strip()
112: if not msgid:
113: # no msgid in the attribute, use the node value
114: for child in node.childNodes:
115: if child.nodeType == 3:
116: buf.append(child.nodeValue)
117: else:
118: raise TypeError, \
119: ('illegal element %s in i18n:translate element' %
120: child.nodeName)
121: msgid = msgstr = self.reduce_whitespace(u''.join(buf).strip())
122: if msgcat.has_key(msgid):
123: msgstr = msgcat[msgid]
124: # now replace the contents of the node with the new contents
125: while node.hasChildNodes():
126: node.removeChild(node.firstChild)
127: node.removeAttributeNS(I18NNS, 'translate')
128: node.appendChild(node.ownerDocument.createTextNode(msgstr))
129:
130: def apply_attributes(self, node, msgcat):
131: """handle Zope-style i18n:attributes"""
132: attrnames = node.getAttributeNS(I18NNS, 'attributes').split(' ')
133: for attr in attrnames:
134: value = node.getAttribute(attr)
135: if value and msgcat.has_key(value):
136: node.setAttribute(attr, unicode(msgcat[value], 'UTF-8'))
137: node.removeAttributeNS(I18NNS, 'attributes')
138:
139: def reduce_whitespace(self, string):
140: for char in ['\n', '\t', '\r']:
141: string = string.replace(char, ' ')
142: while string.find(' ') > -1:
143: string = string.replace(' ', ' ')
144: return string
145:
146: def get_po_file_path(self, locale):
147: for language in locale:
148: startdir = '../i18n'
149: language = language.split('-')
150: pathstart = '%s/kupu-%s' % (startdir, language[0])
151: paths = []
152: if len(language) == 2:
153: paths.append('%s-%s.po' % (pathstart, language[1]))
154: paths += [
155: '%s-default.po' % pathstart,
156: '%s.po' % pathstart,
157: ]
158: for path in paths:
159: if os.path.isfile(path):
160: return path
161:
162: def serialize(self, el):
163: buf = []
164: if el.nodeType == 1:
165: buf.append('<%s' % el.nodeName)
166: if len(el.attributes):
167: for attr, value in el.attributes.items():
168: if value is not None:
169: buf.append(' %s="%s"' % (attr, self.entitize(value)))
170: if el.hasChildNodes() or el.nodeName in self.not_single:
171: buf.append('>')
172: for child in el.childNodes:
173: buf += self.serialize(child)
174: buf.append('</%s>' % el.nodeName)
175: else:
176: buf.append(' />')
177: elif el.nodeType == 3:
178: buf.append(el.nodeValue)
179: else:
180: print 'ignoring node of type', node.nodeType
181: return ''.join([ustr(b) for b in buf])
182:
183: def entitize(self, string):
184: string = string.replace('&', '&')
185: string = string.replace('<', '<')
186: string = string.replace('>', '>')
187: string = string.replace('"', '"')
188: return string
189:
190: if __name__ == '__main__':
191: # test code
192: os.chdir(os.path.abspath(os.path.dirname(__file__)))
193: i = Nationalizer('../common/kupu.html', ['nl'])
194: print i.translate().encode('UTF-8')
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>