version 1.3, 2005/10/26 08:35:53
|
version 1.7, 2006/09/14 14:31:53
|
Line 1
|
Line 1
|
"""Methoden fuer Language Technologies""" |
"""Methoden fuer Language Technologies""" |
|
|
def donatus(txt2): |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
import xmlrpclib |
from Products.ECHO_content.analyseAndTag.analyseAndTag import DonatusFile |
|
from OFS.SimpleItem import SimpleItem |
|
from OFS.Folder import Folder |
|
import xml.parsers |
|
import os.path |
|
from Globals import package_home |
|
|
server = xmlrpclib.ServerProxy("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc") |
class ECHO_language: |
|
"""language methods""" |
|
|
txt=txt2.encode('utf-8') |
|
bin=xmlrpclib.Binary(txt) |
|
|
|
|
def tagLex(self,nr="1",id=None): |
|
"""gerateword tags""" |
|
|
|
|
ret=server.donatus.analyze(bin) |
df=DonatusFile(txt=self.getPage(_pn=nr,_id=id),baseUri=self.baseUri) |
|
|
|
return df.convertedXML() |
|
#return DonatusFile(txt=self.getPage(_pn=nr)).convertedXML() |
|
|
return ret['morphData'].data |
class Collection(SimpleItem): |
|
|
|
def __init__(self,id): |
|
"""initialise""" |
|
self.id=id |
|
self.entries=[] |
|
|
def donatusVariant2Lemma(morphData): |
|
"""creates hash variant -> morphdata""" |
|
ret={} |
|
dom=xml.dom.minidom.parseString(morphData) |
|
lemmas=dom.getElementsByTagName('lemma') |
|
for lemma in lemmas: |
|
variants=lemma.getElementsByTagName('variant') |
|
for variant in variants: |
|
atr=variant.getAttribute('form') |
|
if ret.has_key(atr): |
|
ret[atr].append=lemma.getAttribute('form') |
|
else: |
|
ret[atr]=[lemma.getAttribute('form')] |
|
|
|
return ret |
def getEntries(self): |
|
"""get the entries""" |
|
return self.entries |
|
|
class ECHO_language: |
def deleteEntry(self,nr): |
"""language methods""" |
"""delete an entry""" |
|
del(self.entries[nr]) |
|
|
def donatusVariant2Lemma(self,nr='1'): |
|
"""analyze by donatus""" |
def appendEntry(self,fn,id,type): |
return donatusVariant2Lemma(donatus(self.lemmatize(nr))) |
"""append an entry""" |
|
#check if last entry is complete |
def tagLex(self,nr="1"): |
createNew=False |
"""generate Links""" |
print "Here",fn,id,type |
global retLex |
if len(self.entries)==0: #noch gar kein Eintrag |
global toggle |
createNew=True |
|
|
toggle=0 |
|
retLex="" |
|
|
|
lemmatized=self.lemmatize(nr)[0:] |
|
#print "ho",repr(lemmatized) |
|
variants=donatusVariant2Lemma(donatus(lemmatized)) |
|
|
|
def createTag(name,attrs): |
|
global toggle |
|
|
|
if name=="w": |
|
toggle=1 |
|
return "" |
|
else: |
|
tag="<" |
|
tag+=name |
|
for attr in attrs.keys(): |
|
tag+=""" %s="%s" """%(attr,attrs[attr]) |
|
tag+=">" |
|
return tag |
|
|
|
def createData(data): |
|
global toggle |
|
astring="""<a href="http://141.14.236.86/cgi-bin/toc/dict?step=remotetable;word=%s;lang=de" target="_blank">%s</a> """ |
|
if toggle: # tag war ein w |
|
toggle=0 |
|
if variants.has_key(data): |
|
return astring%(variants[data][0],data) |
|
else: |
else: |
return astring%(data,data) |
entry=self.entries[-1] |
|
if entry.get('master',None) and entry.get('slave',None): |
|
createNew=True |
|
if createNew: |
|
|
|
self.entries.append({}) |
|
entry=self.entries[-1] |
|
if type=="master": |
|
entry['master']=(fn,id) |
|
elif type=="slave": |
|
entry['slave']=(fn,id) |
|
|
|
self.entries[-1]=entry |
|
|
|
class ECHO_linkCreator(Folder): |
|
"""creator for links""" |
|
|
|
meta_type="ECHO_linkCreator" |
|
|
|
|
|
|
|
|
|
def getCollectionEntries(self,collection): |
|
col=getattr(self,collection,None) |
|
if not col: |
|
return [] |
|
|
|
return col.getEntries() |
|
|
|
|
|
def index_html(self,collection=None): |
|
"""show create links""" |
|
if not collection: |
|
return "no collection" |
|
|
|
|
|
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','ECHO_linkCreator_main')).__of__(self) |
|
|
|
col=getattr(self,collection,None) |
|
if not col: |
|
return [] |
|
|
|
masterUrl=getattr(col,'masterUrl','') |
|
slaveUrl=getattr(col,'slaveUrl','') |
|
|
|
return pt(collection=collection,masterUrl=masterUrl,slaveUrl=slaveUrl) |
|
|
|
def addEntry(self,collection,fn,id,type,fromurl=None,RESPONSE=None): |
|
"""add an entry""" |
|
print "hello" |
|
col=getattr(self, collection,None) |
|
if not col: |
|
self._setObject(collection,Collection(collection)) |
|
col=getattr(self, collection) |
|
print "XXXXX2",col |
|
col.appendEntry(fn,id,type) |
|
|
|
if fromurl and RESPONSE: |
|
RESPONSE.redirect(fromurl) |
|
def removeEntry(self,collection,nr,RESPONSE=None): |
|
"""remove an entry""" |
|
col=getattr(self, collection,None) |
|
col.deleteEntry(nr) |
|
|
|
if RESPONSE: |
|
RESPONSE.redirect(self.absolute_url()+"?collection="+collection) |
|
|
|
def setUrls(self,collection,masterUrl,slaveUrl,RESPONSE=None): |
|
"""set the urls for the document viewer""" |
|
col=getattr(self, collection,None) |
|
setattr(col,'masterUrl',masterUrl) |
|
setattr(col,'slaveUrl',slaveUrl) |
|
|
|
if RESPONSE: |
|
RESPONSE.redirect(self.absolute_url()+"?collection="+collection) |
|
|
|
|
|
#self.index_html(collection) |
|
|
# 3 handler functions |
def manage_addECHO_linkCreatorForm(self,RESPONSE=None): |
def start_element(name, attrs): |
"""Form for adding""" |
global retLex |
manage_addECHO_linkCreator(self,RESPONSE) |
|
|
retLex+=createTag(name,attrs) |
def manage_addECHO_linkCreator(self,RESPONSE=None): |
def end_element(name): |
"""Add an ECHO_main""" |
global retLex |
id='linkCreator' |
if not name=="w": |
self._setObject(id,ECHO_linkCreator(id)) |
retLex+="</%s>"%(name.encode('utf-8')) |
|
|
|
|
|
def char_data(data): |
if RESPONSE is not None: |
global retLex |
RESPONSE.redirect('manage_main') |
if data: |
|
try: |
|
retLex+=createData(data) |
|
except: |
|
"""no""" |
|
|
|
p = xml.parsers.expat.ParserCreate() |
|
|
|
p.StartElementHandler = start_element |
|
p.EndElementHandler = end_element |
|
p.CharacterDataHandler = char_data |
|
|
|
p.Parse(lemmatized.encode('utf-8'),1) |
|
#print repr(lemmatized.encode('utf-8')) |
|
|
|
return retLex |
|
|
|
|
|
def lemmatize(self,nr='1',lang="de"): |
|
"""lemmatize""" |
|
global ret |
|
ret="" |
|
|
|
def createTag(name,attrs): |
|
tag="<" |
|
tag+=name |
|
for attr in attrs.keys(): |
|
tag+=""" %s="%s" """%(attr,attrs[attr]) |
|
tag+=">" |
|
return tag |
|
|
|
def insertW(str): |
|
splitted=str.split() |
|
wordlist=["<w>%s</w>"%split for split in splitted] |
|
return string.join(wordlist,'\n') |
|
|
|
# 3 handler functions |
|
def start_element(name, attrs): |
|
global ret |
|
ret+=createTag(name,attrs) |
|
def end_element(name): |
|
global ret |
|
ret+="</%s>"%(name.encode('utf-8')) |
|
|
|
def char_data(data): |
|
global ret |
|
ret+=insertW(data) |
|
|
|
p = xml.parsers.expat.ParserCreate() |
|
|
|
p.StartElementHandler = start_element |
|
p.EndElementHandler = end_element |
|
p.CharacterDataHandler = char_data |
|
|
|
p.Parse(self.getPage(nr), 1) |
|
txt="""<wtag locator="xxx"> |
|
<section lang="%s"><s>%s</s></section> |
|
</wtag>""" |
|
ret=txt%(lang,ret) |
|
|
|
return ret |
|