![]() ![]() | ![]() |
new modoule with language technologies
1: """Methoden fuer Language Technologies""" 2: def donatus(txt2): 3: import xmlrpclib 4: 5: server = xmlrpclib.ServerProxy("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc") 6: 7: txt=txt2.encode('utf-8') 8: bin=xmlrpclib.Binary(txt) 9: 10: 11: 12: ret=server.donatus.analyze(bin) 13: 14: 15: return ret['morphData'].data 16: 17: 18: def donatusVariant2Lemma(morphData): 19: """creates hash variant -> morphdata""" 20: ret={} 21: dom=xml.dom.minidom.parseString(morphData) 22: lemmas=dom.getElementsByTagName('lemma') 23: for lemma in lemmas: 24: variants=lemma.getElementsByTagName('variant') 25: for variant in variants: 26: atr=variant.getAttribute('form') 27: if ret.has_key(atr): 28: ret[atr].append=lemma.getAttribute('form') 29: else: 30: ret[atr]=[lemma.getAttribute('form')] 31: 32: return ret