Annotation of kupu/tools/makepox.py, revision 1.1
1.1 ! dwinter 1: """Simple script to generate .pox files
! 2:
! 3: parses XML for i18n attrs and JS files for _() calls and generates an
! 4: XML .pox template document (.poxt file)
! 5:
! 6: (c) Guido Wesdorp 2005
! 7:
! 8: """
! 9:
! 10: from xml.dom.minidom import parseString, getDOMImplementation
! 11: import sys, re, os
! 12:
! 13: stderr = sys.stderr
! 14:
! 15: warn_on_broken_xml = True
! 16:
! 17: class POX:
! 18: """container for the results"""
! 19: def __init__(self):
! 20: impl = getDOMImplementation()
! 21: self.doc = impl.createDocument(None, 'catalog', None)
! 22: self.root = self.doc.documentElement
! 23: self.processed = {} # mapping from mid to ([filenames], node)
! 24:
! 25: def add(self, msgid, filename):
! 26: # strip and reduce whitespace
! 27: msgid = msgid.strip().replace('\n', ' ').replace('\t', ' ')
! 28: while msgid.find(' ') > -1:
! 29: msgid.replace(' ', ' ')
! 30: if self.processed.has_key(msgid):
! 31: filenames, node = self.processed[msgid]
! 32: if not filename in filenames:
! 33: filenames.append(filename)
! 34: node.setAttribute('filenames',
! 35: '%s %s' % (node.getAttribute('filenames'), filename))
! 36: return
! 37: doc = self.doc
! 38: root = self.root
! 39: # add the nodes
! 40: msgnode = doc.createElement('message')
! 41: msgnode.setAttribute('filenames', filename)
! 42: root.appendChild(msgnode)
! 43: msgidnode = doc.createElement('msgid')
! 44: msgidnode.appendChild(doc.createTextNode(msgid))
! 45: msgnode.appendChild(msgidnode)
! 46: msgstrnode = doc.createElement('msgstr')
! 47: msgstrnode.appendChild(doc.createTextNode(msgid))
! 48: msgnode.appendChild(msgstrnode)
! 49: msgstrnode.setAttribute('i18n:translate', '')
! 50: root.appendChild(msgnode)
! 51: self.processed[msgid] = ([filename], msgnode)
! 52:
! 53: def get_result(self):
! 54: return self.doc.toprettyxml()
! 55:
! 56: class XMLParser:
! 57: """scans XML files (or well-formed HTML files, obviously) for i18 attrs"""
! 58: def __init__(self, files, pox):
! 59: self._current = None
! 60: for file in files:
! 61: self.parse_file(file, pox)
! 62:
! 63: def parse_file(self, filename, pox):
! 64: fp = open(filename)
! 65: try:
! 66: dom = parseString(fp.read())
! 67: except:
! 68: exc, e, tb = sys.exc_info()
! 69: del tb
! 70: if warn_on_broken_xml:
! 71: print >>stderr, 'Error parsing %s: %s - %s' % (filename, exc, e)
! 72: return
! 73: # walk through all the nodes and scan for i18n: stuff
! 74: while 1:
! 75: node = self.next_node(dom)
! 76: if not node:
! 77: break
! 78: if node.nodeType == 1:
! 79: attrs = node.attributes
! 80: translate = attrs.getNamedItem('i18n:translate')
! 81: if translate:
! 82: msgid = translate.value
! 83: if not msgid.strip():
! 84: msgid = self.extract_text(node)
! 85: pox.add(msgid, filename)
! 86: attributes = attrs.getNamedItem('i18n:attributes')
! 87: if attributes:
! 88: attributes = [a.strip() for a in
! 89: attributes.value.split(';')]
! 90: for attr in attributes:
! 91: attritem = attrs.getNamedItem(attr)
! 92: if not attritem:
! 93: raise AttributeError, \
! 94: 'No %s on %s in %s' % (
! 95: attr, node.nodeName, filename)
! 96: msgid = attritem.value;
! 97: pox.add(msgid, filename)
! 98:
! 99: def extract_text(self, node):
! 100: xml = ''
! 101: for child in node.childNodes:
! 102: xml += child.toxml().strip().replace('\n', ' ').replace('\t', ' ')
! 103: while xml.find(' ') > -1:
! 104: xml = xml.replace(' ', ' ')
! 105: return xml
! 106:
! 107: def next_node(self, dom):
! 108: if not self._current or self._current.ownerDocument != dom:
! 109: self._current = dom.documentElement
! 110: else:
! 111: cur = self._current
! 112: if cur.hasChildNodes():
! 113: self._current = cur.childNodes[0]
! 114: elif cur != cur.parentNode.lastChild:
! 115: self._current = cur.nextSibling
! 116: else:
! 117: self._current = cur.parentNode.nextSibling
! 118: return self._current
! 119:
! 120: class JSParser:
! 121: """scans JS files for _() calls"""
! 122: def __init__(self, files, pox):
! 123: for file in files:
! 124: self.parse_file(file, pox)
! 125:
! 126: _startfuncreg = re.compile('.*?[^a-zA-Z0-9_]_\(')
! 127: _startfuncreg_2 = re.compile('^_\(')
! 128: def parse_file(self, filename, pox):
! 129: lines = open(filename).readlines()
! 130: lineno = 0
! 131: more = False
! 132: chunks = []
! 133: for line in lines:
! 134: lineno += 1
! 135: if more is True or self._startfuncreg.search(line):
! 136: chunk, more = self._get_func_content(line, filename,
! 137: lineno, more)
! 138: chunks.append(chunk)
! 139: if chunks and more is False:
! 140: literal = ''.join(chunks).strip()
! 141: if not literal:
! 142: raise ValueError, ('Unrecognized function content -- '
! 143: 'file %s, line %s' % (
! 144: filename, lineno))
! 145: literal = literal.replace('\t', ' ').replace('\n', ' ')
! 146: while literal.find(' ') > -1:
! 147: literal = literal.replace(' ', ' ')
! 148: more = False
! 149: chunks = []
! 150: pox.add(literal, filename)
! 151:
! 152: def _get_func_content(self, line, filename, lineno, more=False):
! 153: """return the content of the _() call in line
! 154:
! 155: if more is True, this will assume the function is already opened
! 156: and continue adding to the result from the start of the line
! 157: without searching for '[^a-zA-Z_]_(' first
! 158:
! 159: returns a tuple (content, more) where more is True if the end of
! 160: the function body is not reached, in that case this method should
! 161: be called again with the 'more' argument set to True
! 162: """
! 163: line = line.strip()
! 164: if not more:
! 165: match = self._startfuncreg.search(line) or \
! 166: self._startfuncreg_2.search(line)
! 167: line = line.replace(match.group(0), '')
! 168: line = line.strip()
! 169: quote = line[0]
! 170: line = line[1:]
! 171: if not quote in ['"', "'"]:
! 172: raise ValueError, ('beginning of function body not a recognized '
! 173: 'quote character: %s -- (file %s, line %s)' % (
! 174: quote, filename, lineno))
! 175: ret = []
! 176: previous_char = None
! 177: while 1:
! 178: new_char = line[0]
! 179: line = line[1:]
! 180: if new_char == quote:
! 181: if previous_char != '\\':
! 182: break
! 183: ret.append(new_char)
! 184: previous_char = new_char
! 185:
! 186: # find out if we should continue after this (do we have a '+'
! 187: # or a ');'?)
! 188: more = False
! 189: line = line.strip()
! 190: if line and line[0] == '+':
! 191: line = line[1:].strip()
! 192: if line:
! 193: raise ValueError, ('string concatenation only allowed for '
! 194: 'multiline strings, not for variable '
! 195: 'interpolation (use ${} instead) -- '
! 196: '(file %s, line %s)' % (
! 197: filename, lineno))
! 198: more = True
! 199: return ''.join(ret), more
! 200:
! 201: if __name__ == '__main__':
! 202: print >>stderr, 'POX extract v0.1'
! 203: print >>stderr, '(c) Guido Wesdorp 2004'
! 204: files = sys.argv[1:]
! 205: print >>stderr, 'Going to parse files', ', '.join(files)
! 206: pox = POX()
! 207: xml = [f for f in files if not f.endswith('.js')]
! 208: js = [f for f in files if f.endswith('.js')]
! 209: XMLParser(xml, pox)
! 210: JSParser(js, pox)
! 211: pres = pox.get_result()
! 212: pres = pres.replace('<catalog>',
! 213: ('<catalog xmlns:i18n="http://xml.zope.org/namespaces/i18n" '
! 214: 'i18n:domain="kupu">'))
! 215: print pres
! 216: print >>stderr, 'Done'
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>