--- ECHO_content/ECHO_language.py 2005/10/26 08:35:53 1.3
+++ ECHO_content/ECHO_language.py 2006/10/11 16:55:26 1.8
@@ -1,153 +1,168 @@
"""Methoden fuer Language Technologies"""
-def donatus(txt2):
- import xmlrpclib
-
- server = xmlrpclib.ServerProxy("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc")
-
- txt=txt2.encode('utf-8')
- bin=xmlrpclib.Binary(txt)
-
-
-
- ret=server.donatus.analyze(bin)
-
-
- return ret['morphData'].data
-
-
-def donatusVariant2Lemma(morphData):
- """creates hash variant -> morphdata"""
- ret={}
- dom=xml.dom.minidom.parseString(morphData)
- lemmas=dom.getElementsByTagName('lemma')
- for lemma in lemmas:
- variants=lemma.getElementsByTagName('variant')
- for variant in variants:
- atr=variant.getAttribute('form')
- if ret.has_key(atr):
- ret[atr].append=lemma.getAttribute('form')
- else:
- ret[atr]=[lemma.getAttribute('form')]
-
- return ret
+from Products.PageTemplates.PageTemplateFile import PageTemplateFile
+from Products.ECHO_content.analyseAndTag.analyseAndTag import DonatusFile
+from OFS.SimpleItem import SimpleItem
+from OFS.Folder import Folder
+import xml.parsers
+import os.path
+from Globals import package_home
class ECHO_language:
"""language methods"""
- def donatusVariant2Lemma(self,nr='1'):
- """analyze by donatus"""
- return donatusVariant2Lemma(donatus(self.lemmatize(nr)))
-
- def tagLex(self,nr="1"):
- """generate Links"""
- global retLex
- global toggle
-
- toggle=0
- retLex=""
-
- lemmatized=self.lemmatize(nr)[0:]
- #print "ho",repr(lemmatized)
- variants=donatusVariant2Lemma(donatus(lemmatized))
-
- def createTag(name,attrs):
- global toggle
-
- if name=="w":
- toggle=1
- return ""
- else:
- tag="<"
- tag+=name
- for attr in attrs.keys():
- tag+=""" %s="%s" """%(attr,attrs[attr])
- tag+=">"
- return tag
-
- def createData(data):
- global toggle
- astring="""%s """
- if toggle: # tag war ein w
- toggle=0
- if variants.has_key(data):
- return astring%(variants[data][0],data)
- else:
- return astring%(data,data)
-
-
-
- # 3 handler functions
- def start_element(name, attrs):
- global retLex
-
- retLex+=createTag(name,attrs)
- def end_element(name):
- global retLex
- if not name=="w":
- retLex+="%s>"%(name.encode('utf-8'))
-
-
- def char_data(data):
- global retLex
- if data:
- try:
- retLex+=createData(data)
- except:
- """no"""
-
- p = xml.parsers.expat.ParserCreate()
-
- p.StartElementHandler = start_element
- p.EndElementHandler = end_element
- p.CharacterDataHandler = char_data
-
- p.Parse(lemmatized.encode('utf-8'),1)
- #print repr(lemmatized.encode('utf-8'))
- return retLex
+ def tagLex(self,nr="1",id=None):
+ """gerateword tags"""
+
+
+ df=DonatusFile(txt=self.getPage(_pn=nr,_id=id),baseUri=self.baseUri)
+
+ return df.convertedXML()
+ #return DonatusFile(txt=self.getPage(_pn=nr)).convertedXML()
-
- def lemmatize(self,nr='1',lang="de"):
- """lemmatize"""
- global ret
- ret=""
-
- def createTag(name,attrs):
- tag="<"
- tag+=name
- for attr in attrs.keys():
- tag+=""" %s="%s" """%(attr,attrs[attr])
- tag+=">"
- return tag
-
- def insertW(str):
- splitted=str.split()
- wordlist=["%s"%split for split in splitted]
- return string.join(wordlist,'\n')
-
- # 3 handler functions
- def start_element(name, attrs):
- global ret
- ret+=createTag(name,attrs)
- def end_element(name):
- global ret
- ret+="%s>"%(name.encode('utf-8'))
-
- def char_data(data):
- global ret
- ret+=insertW(data)
-
- p = xml.parsers.expat.ParserCreate()
-
- p.StartElementHandler = start_element
- p.EndElementHandler = end_element
- p.CharacterDataHandler = char_data
-
- p.Parse(self.getPage(nr), 1)
- txt="""
-
- """
- ret=txt%(lang,ret)
-
- return ret
+class Collection(SimpleItem):
+
+ def __init__(self,id):
+ """initialise"""
+ self.id=id
+ self.entries=[]
+
+
+ def getEntries(self):
+ """get the entries"""
+ entries=self.entries
+
+ for entry in entries: #backward compatibility, cannot be removed a.s.a.p.
+ print entry
+ if entry.has_key('master') and (len(entry['master'])<3):
+ entry['master']=(entry['master'][0],entry['master'][1],'')
+ if entry.has_key('slave') and (len(entry['slave'])<3):
+ entry['slave']=(entry['slave'][0],entry['slave'][1],'')
+
+ return self.entries
+
+ def deleteEntry(self,nr):
+ """delete an entry"""
+ del(self.entries[nr])
+
+
+ def appendEntry(self,fn,id,type,pagelink):
+ """append an entry"""
+ #check if last entry is complete
+ createNew=False
+
+ if len(self.entries)==0: #noch gar kein Eintrag
+ createNew=True
+ else:
+ entry=self.entries[-1]
+ if entry.get('master',None) and entry.get('slave',None):
+ createNew=True
+ if createNew:
+
+ self.entries.append({})
+ entry=self.entries[-1]
+ if type=="master":
+ entry['master']=(fn,id,pagelink)
+ elif type=="slave":
+ entry['slave']=(fn,id,pagelink)
+
+ entries=self.entries[0:]
+ entries[-1]=entry
+ self.entries=entries[0:]
+
+class ECHO_linkCreator(Folder):
+ """creator for links"""
+
+ meta_type="ECHO_linkCreator"
+
+
+
+
+ def getCollectionEntries(self,collection):
+ col=getattr(self,collection,None)
+ if not col:
+ return []
+
+ return col.getEntries()
+
+
+ def index_html(self,collection=None):
+ """show create links"""
+ if not collection:
+ return "no collection: need parameter collection=COLLECTION_NAME"
+
+
+ pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','ECHO_linkCreator_main')).__of__(self)
+
+ col=getattr(self,collection,None)
+ if not col:
+ return []
+
+ masterUrl=getattr(col,'masterUrl','')
+ slaveUrl=getattr(col,'slaveUrl','')
+
+ return pt(collection=collection,masterUrl=masterUrl,slaveUrl=slaveUrl)
+
+ def addEntry(self,collection,fn,id,type,pagelink,fromurl=None,RESPONSE=None,REQUEST=None):
+ """add an entry"""
+
+ col=getattr(self, collection,None)
+ if not col:
+ self._setObject(collection,Collection(collection))
+ col=getattr(self, collection)
+
+ col.appendEntry(fn,id,type,pagelink)
+
+ if fromurl and RESPONSE:
+
+ RESPONSE.setHeader("Expires",(DateTime()-1).rfc822())
+ RESPONSE.setHeader("Cache-Control", "no-cache")
+ RESPONSE.redirect(fromurl)
+
+ def removeEntry(self,collection,nr,RESPONSE=None):
+ """remove an entry"""
+ col=getattr(self, collection,None)
+ col.deleteEntry(nr)
+
+ if RESPONSE:
+ RESPONSE.redirect(self.absolute_url()+"?collection="+collection)
+
+ def setUrls(self,collection,masterUrl,slaveUrl,RESPONSE=None):
+ """set the urls for the document viewer"""
+ col=getattr(self, collection,None)
+ setattr(col,'masterUrl',masterUrl)
+ setattr(col,'slaveUrl',slaveUrl)
+
+ if RESPONSE:
+ RESPONSE.redirect(self.absolute_url()+"?collection="+collection)
+
+
+ #self.index_html(collection)
+
+def manage_addECHO_linkCreatorForm(self,RESPONSE=None):
+ """Form for adding"""
+ manage_addECHO_linkCreator(self,RESPONSE)
+
+def manage_addECHO_linkCreator(self,RESPONSE=None):
+ """Add an ECHO_main"""
+ id='linkCreator'
+ self._setObject(id,ECHO_linkCreator(id))
+
+
+ if RESPONSE is not None:
+ RESPONSE.redirect('manage_main')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file