version 1.3, 2005/10/26 08:35:53
|
version 1.4, 2006/09/10 11:03:07
|
Line 1
|
Line 1
|
"""Methoden fuer Language Technologies""" |
"""Methoden fuer Language Technologies""" |
|
|
|
|
|
from Products.ECHO_content.analyseAndTag.analyseAndTag import DonatusFile |
|
|
|
import xml.parsers |
|
|
def donatus(txt2): |
def donatus(txt2): |
import xmlrpclib |
import xmlrpclib |
|
|
Line 39 class ECHO_language:
|
Line 44 class ECHO_language:
|
"""analyze by donatus""" |
"""analyze by donatus""" |
return donatusVariant2Lemma(donatus(self.lemmatize(nr))) |
return donatusVariant2Lemma(donatus(self.lemmatize(nr))) |
|
|
|
|
def tagLex(self,nr="1"): |
def tagLex(self,nr="1"): |
|
"""gerateLinks""" |
|
txt=self.getPage(_pn=nr) |
|
|
|
df=DonatusFile(txt=self.getPage(_pn=nr)) |
|
|
|
return df.wordsToLinks() |
|
#return DonatusFile(txt=self.getPage(_pn=nr)).convertedXML() |
|
|
|
def tagLex_old(self,nr="1"): |
"""generate Links""" |
"""generate Links""" |
global retLex |
global retLex |
global toggle |
global toggle |
Line 124 class ECHO_language:
|
Line 139 class ECHO_language:
|
def insertW(str): |
def insertW(str): |
splitted=str.split() |
splitted=str.split() |
wordlist=["<w>%s</w>"%split for split in splitted] |
wordlist=["<w>%s</w>"%split for split in splitted] |
return string.join(wordlist,'\n') |
return "\n".join(wordlist) |
|
|
# 3 handler functions |
# 3 handler functions |
def start_element(name, attrs): |
def start_element(name, attrs): |