--- ECHO_content/ECHO_language.py 2006/09/10 11:03:07 1.4 +++ ECHO_content/ECHO_language.py 2006/09/14 14:31:53 1.7 @@ -1,168 +1,153 @@ """Methoden fuer Language Technologies""" - +from Products.PageTemplates.PageTemplateFile import PageTemplateFile from Products.ECHO_content.analyseAndTag.analyseAndTag import DonatusFile - +from OFS.SimpleItem import SimpleItem +from OFS.Folder import Folder import xml.parsers - -def donatus(txt2): - import xmlrpclib - - server = xmlrpclib.ServerProxy("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc") - - txt=txt2.encode('utf-8') - bin=xmlrpclib.Binary(txt) - - - - ret=server.donatus.analyze(bin) - - - return ret['morphData'].data - - -def donatusVariant2Lemma(morphData): - """creates hash variant -> morphdata""" - ret={} - dom=xml.dom.minidom.parseString(morphData) - lemmas=dom.getElementsByTagName('lemma') - for lemma in lemmas: - variants=lemma.getElementsByTagName('variant') - for variant in variants: - atr=variant.getAttribute('form') - if ret.has_key(atr): - ret[atr].append=lemma.getAttribute('form') - else: - ret[atr]=[lemma.getAttribute('form')] - - return ret +import os.path +from Globals import package_home class ECHO_language: """language methods""" - def donatusVariant2Lemma(self,nr='1'): - """analyze by donatus""" - return donatusVariant2Lemma(donatus(self.lemmatize(nr))) - - def tagLex(self,nr="1"): - """gerateLinks""" - txt=self.getPage(_pn=nr) + def tagLex(self,nr="1",id=None): + """gerateword tags""" + - df=DonatusFile(txt=self.getPage(_pn=nr)) + df=DonatusFile(txt=self.getPage(_pn=nr,_id=id),baseUri=self.baseUri) - return df.wordsToLinks() + return df.convertedXML() #return DonatusFile(txt=self.getPage(_pn=nr)).convertedXML() - def tagLex_old(self,nr="1"): - """generate Links""" - global retLex - global toggle - - toggle=0 - retLex="" - - lemmatized=self.lemmatize(nr)[0:] - #print "ho",repr(lemmatized) - variants=donatusVariant2Lemma(donatus(lemmatized)) - - def createTag(name,attrs): - global toggle - - if name=="w": - toggle=1 - return "" - else: - tag="<" - tag+=name - for attr in attrs.keys(): - tag+=""" %s="%s" """%(attr,attrs[attr]) - tag+=">" - return tag - - def createData(data): - global toggle - astring="""%s """ - if toggle: # tag war ein w - toggle=0 - if variants.has_key(data): - return astring%(variants[data][0],data) - else: - return astring%(data,data) - - - - # 3 handler functions - def start_element(name, attrs): - global retLex - - retLex+=createTag(name,attrs) - def end_element(name): - global retLex - if not name=="w": - retLex+=""%(name.encode('utf-8')) - - - def char_data(data): - global retLex - if data: - try: - retLex+=createData(data) - except: - """no""" - - p = xml.parsers.expat.ParserCreate() - - p.StartElementHandler = start_element - p.EndElementHandler = end_element - p.CharacterDataHandler = char_data - - p.Parse(lemmatized.encode('utf-8'),1) - #print repr(lemmatized.encode('utf-8')) - - return retLex - - - def lemmatize(self,nr='1',lang="de"): - """lemmatize""" - global ret - ret="" - - def createTag(name,attrs): - tag="<" - tag+=name - for attr in attrs.keys(): - tag+=""" %s="%s" """%(attr,attrs[attr]) - tag+=">" - return tag - - def insertW(str): - splitted=str.split() - wordlist=["%s"%split for split in splitted] - return "\n".join(wordlist) - - # 3 handler functions - def start_element(name, attrs): - global ret - ret+=createTag(name,attrs) - def end_element(name): - global ret - ret+=""%(name.encode('utf-8')) - - def char_data(data): - global ret - ret+=insertW(data) - - p = xml.parsers.expat.ParserCreate() - - p.StartElementHandler = start_element - p.EndElementHandler = end_element - p.CharacterDataHandler = char_data - - p.Parse(self.getPage(nr), 1) - txt=""" -
%s
-
""" - ret=txt%(lang,ret) - - return ret +class Collection(SimpleItem): + + def __init__(self,id): + """initialise""" + self.id=id + self.entries=[] + + + def getEntries(self): + """get the entries""" + return self.entries + + def deleteEntry(self,nr): + """delete an entry""" + del(self.entries[nr]) + + + def appendEntry(self,fn,id,type): + """append an entry""" + #check if last entry is complete + createNew=False + print "Here",fn,id,type + if len(self.entries)==0: #noch gar kein Eintrag + createNew=True + else: + entry=self.entries[-1] + if entry.get('master',None) and entry.get('slave',None): + createNew=True + if createNew: + + self.entries.append({}) + entry=self.entries[-1] + if type=="master": + entry['master']=(fn,id) + elif type=="slave": + entry['slave']=(fn,id) + + self.entries[-1]=entry + +class ECHO_linkCreator(Folder): + """creator for links""" + + meta_type="ECHO_linkCreator" + + + + + def getCollectionEntries(self,collection): + col=getattr(self,collection,None) + if not col: + return [] + + return col.getEntries() + + + def index_html(self,collection=None): + """show create links""" + if not collection: + return "no collection" + + + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','ECHO_linkCreator_main')).__of__(self) + + col=getattr(self,collection,None) + if not col: + return [] + + masterUrl=getattr(col,'masterUrl','') + slaveUrl=getattr(col,'slaveUrl','') + + return pt(collection=collection,masterUrl=masterUrl,slaveUrl=slaveUrl) + + def addEntry(self,collection,fn,id,type,fromurl=None,RESPONSE=None): + """add an entry""" + print "hello" + col=getattr(self, collection,None) + if not col: + self._setObject(collection,Collection(collection)) + col=getattr(self, collection) + print "XXXXX2",col + col.appendEntry(fn,id,type) + + if fromurl and RESPONSE: + RESPONSE.redirect(fromurl) + def removeEntry(self,collection,nr,RESPONSE=None): + """remove an entry""" + col=getattr(self, collection,None) + col.deleteEntry(nr) + + if RESPONSE: + RESPONSE.redirect(self.absolute_url()+"?collection="+collection) + + def setUrls(self,collection,masterUrl,slaveUrl,RESPONSE=None): + """set the urls for the document viewer""" + col=getattr(self, collection,None) + setattr(col,'masterUrl',masterUrl) + setattr(col,'slaveUrl',slaveUrl) + + if RESPONSE: + RESPONSE.redirect(self.absolute_url()+"?collection="+collection) + + + #self.index_html(collection) + +def manage_addECHO_linkCreatorForm(self,RESPONSE=None): + """Form for adding""" + manage_addECHO_linkCreator(self,RESPONSE) + +def manage_addECHO_linkCreator(self,RESPONSE=None): + """Add an ECHO_main""" + id='linkCreator' + self._setObject(id,ECHO_linkCreator(id)) + + + if RESPONSE is not None: + RESPONSE.redirect('manage_main') + + + + + + + + + + + + + + \ No newline at end of file