File:  [Repository] / ECHO_content / ECHO_language.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Wed Nov 24 15:17:41 2004 UTC (19 years, 7 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD
new modoule with language technologies

    1: """Methoden fuer Language Technologies"""
    2: def donatus(txt2):
    3: 	import xmlrpclib
    4: 
    5: 	server = xmlrpclib.ServerProxy("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc")
    6: 
    7: 	txt=txt2.encode('utf-8')
    8: 	bin=xmlrpclib.Binary(txt)
    9: 
   10: 	
   11: 
   12: 	ret=server.donatus.analyze(bin)
   13: 
   14: 		
   15: 	return ret['morphData'].data
   16: 
   17: 
   18: def donatusVariant2Lemma(morphData):
   19: 	"""creates hash variant -> morphdata"""
   20: 	ret={}
   21: 	dom=xml.dom.minidom.parseString(morphData)
   22: 	lemmas=dom.getElementsByTagName('lemma')
   23: 	for lemma in lemmas:
   24: 		variants=lemma.getElementsByTagName('variant')
   25: 		for variant in variants:
   26: 			atr=variant.getAttribute('form')
   27: 			if ret.has_key(atr):
   28: 				ret[atr].append=lemma.getAttribute('form')
   29: 			else:
   30: 				ret[atr]=[lemma.getAttribute('form')]
   31: 
   32: 	return ret

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>