Annotation of kupuMPIWG/tools/makepox.py, revision 1.1.1.1
1.1 dwinter 1: """Simple script to generate .pox files
2:
3: parses XML for i18n attrs and JS files for _() calls and generates an
4: XML .pox template document (.poxt file)
5:
6: (c) Guido Wesdorp 2005
7:
8: """
9:
10: from xml.dom.minidom import parseString, getDOMImplementation
11: import sys, re, os
12:
13: stderr = sys.stderr
14:
15: warn_on_broken_xml = True
16:
17: class POX:
18: """container for the results"""
19: def __init__(self):
20: impl = getDOMImplementation()
21: self.doc = impl.createDocument(None, 'catalog', None)
22: self.root = self.doc.documentElement
23: self.processed = {} # mapping from mid to ([filenames], node)
24:
25: def add(self, msgid, filename):
26: # strip and reduce whitespace
27: msgid = msgid.strip().replace('\n', ' ').replace('\t', ' ')
28: while msgid.find(' ') > -1:
29: msgid.replace(' ', ' ')
30: if self.processed.has_key(msgid):
31: filenames, node = self.processed[msgid]
32: if not filename in filenames:
33: filenames.append(filename)
34: node.setAttribute('filenames',
35: '%s %s' % (node.getAttribute('filenames'), filename))
36: return
37: doc = self.doc
38: root = self.root
39: # add the nodes
40: msgnode = doc.createElement('message')
41: msgnode.setAttribute('filenames', filename)
42: root.appendChild(msgnode)
43: msgidnode = doc.createElement('msgid')
44: msgidnode.appendChild(doc.createTextNode(msgid))
45: msgnode.appendChild(msgidnode)
46: msgstrnode = doc.createElement('msgstr')
47: msgstrnode.appendChild(doc.createTextNode(msgid))
48: msgnode.appendChild(msgstrnode)
49: msgstrnode.setAttribute('i18n:translate', '')
50: root.appendChild(msgnode)
51: self.processed[msgid] = ([filename], msgnode)
52:
53: def get_result(self):
54: return self.doc.toprettyxml()
55:
56: class XMLParser:
57: """scans XML files (or well-formed HTML files, obviously) for i18 attrs"""
58: def __init__(self, files, pox):
59: self._current = None
60: for file in files:
61: self.parse_file(file, pox)
62:
63: def parse_file(self, filename, pox):
64: fp = open(filename)
65: try:
66: dom = parseString(fp.read())
67: except:
68: exc, e, tb = sys.exc_info()
69: del tb
70: if warn_on_broken_xml:
71: print >>stderr, 'Error parsing %s: %s - %s' % (filename, exc, e)
72: return
73: # walk through all the nodes and scan for i18n: stuff
74: while 1:
75: node = self.next_node(dom)
76: if not node:
77: break
78: if node.nodeType == 1:
79: attrs = node.attributes
80: translate = attrs.getNamedItem('i18n:translate')
81: if translate:
82: msgid = translate.value
83: if not msgid.strip():
84: msgid = self.extract_text(node)
85: pox.add(msgid, filename)
86: attributes = attrs.getNamedItem('i18n:attributes')
87: if attributes:
88: attributes = [a.strip() for a in
89: attributes.value.split(';')]
90: for attr in attributes:
91: attritem = attrs.getNamedItem(attr)
92: if not attritem:
93: raise AttributeError, \
94: 'No %s on %s in %s' % (
95: attr, node.nodeName, filename)
96: msgid = attritem.value;
97: pox.add(msgid, filename)
98:
99: def extract_text(self, node):
100: xml = ''
101: for child in node.childNodes:
102: xml += child.toxml().strip().replace('\n', ' ').replace('\t', ' ')
103: while xml.find(' ') > -1:
104: xml = xml.replace(' ', ' ')
105: return xml
106:
107: def next_node(self, dom):
108: if not self._current or self._current.ownerDocument != dom:
109: self._current = dom.documentElement
110: else:
111: cur = self._current
112: if cur.hasChildNodes():
113: self._current = cur.childNodes[0]
114: elif cur != cur.parentNode.lastChild:
115: self._current = cur.nextSibling
116: else:
117: self._current = cur.parentNode.nextSibling
118: return self._current
119:
120: class JSParser:
121: """scans JS files for _() calls"""
122: def __init__(self, files, pox):
123: for file in files:
124: self.parse_file(file, pox)
125:
126: _startfuncreg = re.compile('.*?[^a-zA-Z0-9_]_\(')
127: _startfuncreg_2 = re.compile('^_\(')
128: def parse_file(self, filename, pox):
129: lines = open(filename).readlines()
130: lineno = 0
131: more = False
132: chunks = []
133: for line in lines:
134: lineno += 1
135: if more is True or self._startfuncreg.search(line):
136: chunk, more = self._get_func_content(line, filename,
137: lineno, more)
138: chunks.append(chunk)
139: if chunks and more is False:
140: literal = ''.join(chunks).strip()
141: if not literal:
142: raise ValueError, ('Unrecognized function content -- '
143: 'file %s, line %s' % (
144: filename, lineno))
145: literal = literal.replace('\t', ' ').replace('\n', ' ')
146: while literal.find(' ') > -1:
147: literal = literal.replace(' ', ' ')
148: more = False
149: chunks = []
150: pox.add(literal, filename)
151:
152: def _get_func_content(self, line, filename, lineno, more=False):
153: """return the content of the _() call in line
154:
155: if more is True, this will assume the function is already opened
156: and continue adding to the result from the start of the line
157: without searching for '[^a-zA-Z_]_(' first
158:
159: returns a tuple (content, more) where more is True if the end of
160: the function body is not reached, in that case this method should
161: be called again with the 'more' argument set to True
162: """
163: line = line.strip()
164: if not more:
165: match = self._startfuncreg.search(line) or \
166: self._startfuncreg_2.search(line)
167: line = line.replace(match.group(0), '')
168: line = line.strip()
169: quote = line[0]
170: line = line[1:]
171: if not quote in ['"', "'"]:
172: raise ValueError, ('beginning of function body not a recognized '
173: 'quote character: %s -- (file %s, line %s)' % (
174: quote, filename, lineno))
175: ret = []
176: previous_char = None
177: while 1:
178: new_char = line[0]
179: line = line[1:]
180: if new_char == quote:
181: if previous_char != '\\':
182: break
183: ret.append(new_char)
184: previous_char = new_char
185:
186: # find out if we should continue after this (do we have a '+'
187: # or a ');'?)
188: more = False
189: line = line.strip()
190: if line and line[0] == '+':
191: line = line[1:].strip()
192: if line:
193: raise ValueError, ('string concatenation only allowed for '
194: 'multiline strings, not for variable '
195: 'interpolation (use ${} instead) -- '
196: '(file %s, line %s)' % (
197: filename, lineno))
198: more = True
199: return ''.join(ret), more
200:
201: if __name__ == '__main__':
202: print >>stderr, 'POX extract v0.1'
203: print >>stderr, '(c) Guido Wesdorp 2004'
204: files = sys.argv[1:]
205: print >>stderr, 'Going to parse files', ', '.join(files)
206: pox = POX()
207: xml = [f for f in files if not f.endswith('.js')]
208: js = [f for f in files if f.endswith('.js')]
209: XMLParser(xml, pox)
210: JSParser(js, pox)
211: pres = pox.get_result()
212: pres = pres.replace('<catalog>',
213: ('<catalog xmlns:i18n="http://xml.zope.org/namespaces/i18n" '
214: 'i18n:domain="kupu">'))
215: print pres
216: print >>stderr, 'Done'
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>