--- ECHO_content/ECHO_language.py 2004/11/24 15:17:41 1.1 +++ ECHO_content/ECHO_language.py 2006/09/11 14:43:23 1.6 @@ -1,32 +1,21 @@ """Methoden fuer Language Technologies""" -def donatus(txt2): - import xmlrpclib - server = xmlrpclib.ServerProxy("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc") - txt=txt2.encode('utf-8') - bin=xmlrpclib.Binary(txt) +from Products.ECHO_content.analyseAndTag.analyseAndTag import DonatusFile - +import xml.parsers - ret=server.donatus.analyze(bin) - - - return ret['morphData'].data - - -def donatusVariant2Lemma(morphData): - """creates hash variant -> morphdata""" - ret={} - dom=xml.dom.minidom.parseString(morphData) - lemmas=dom.getElementsByTagName('lemma') - for lemma in lemmas: - variants=lemma.getElementsByTagName('variant') - for variant in variants: - atr=variant.getAttribute('form') - if ret.has_key(atr): - ret[atr].append=lemma.getAttribute('form') - else: - ret[atr]=[lemma.getAttribute('form')] - - return ret +class ECHO_language: + """language methods""" + + + def tagLex(self,nr="1"): + """gerateword tags""" + txt=self.getPage(_pn=nr) + + df=DonatusFile(txt=self.getPage(_pn=nr),baseUri=self.baseUri) + + return df.convertedXML() + #return DonatusFile(txt=self.getPage(_pn=nr)).convertedXML() + + \ No newline at end of file