version 1.1, 2004/11/24 15:17:41
|
version 1.5, 2006/09/10 22:57:38
|
Line 1
|
Line 1
|
"""Methoden fuer Language Technologies""" |
"""Methoden fuer Language Technologies""" |
def donatus(txt2): |
|
import xmlrpclib |
|
|
|
server = xmlrpclib.ServerProxy("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc") |
|
|
|
txt=txt2.encode('utf-8') |
from Products.ECHO_content.analyseAndTag.analyseAndTag import DonatusFile |
bin=xmlrpclib.Binary(txt) |
|
|
|
|
import xml.parsers |
|
|
|
class ECHO_language: |
|
"""language methods""" |
|
|
ret=server.donatus.analyze(bin) |
|
|
|
|
def tagLex(self,nr="1"): |
|
"""gerateword tags""" |
|
txt=self.getPage(_pn=nr) |
|
|
return ret['morphData'].data |
df=DonatusFile(txt=self.getPage(_pn=nr)) |
|
|
|
return df.convertedXML() |
|
#return DonatusFile(txt=self.getPage(_pn=nr)).convertedXML() |
|
|
def donatusVariant2Lemma(morphData): |
|
"""creates hash variant -> morphdata""" |
|
ret={} |
|
dom=xml.dom.minidom.parseString(morphData) |
|
lemmas=dom.getElementsByTagName('lemma') |
|
for lemma in lemmas: |
|
variants=lemma.getElementsByTagName('variant') |
|
for variant in variants: |
|
atr=variant.getAttribute('form') |
|
if ret.has_key(atr): |
|
ret[atr].append=lemma.getAttribute('form') |
|
else: |
|
ret[atr]=[lemma.getAttribute('form')] |
|
|
|
return ret |
|