--- ECHO_content/ECHO_language.py 2005/10/26 08:35:53 1.3
+++ ECHO_content/ECHO_language.py 2007/01/09 17:01:01 1.11
@@ -1,153 +1,255 @@
"""Methoden fuer Language Technologies"""
-def donatus(txt2):
- import xmlrpclib
-
- server = xmlrpclib.ServerProxy("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc")
-
- txt=txt2.encode('utf-8')
- bin=xmlrpclib.Binary(txt)
-
-
-
- ret=server.donatus.analyze(bin)
-
-
- return ret['morphData'].data
-
-
-def donatusVariant2Lemma(morphData):
- """creates hash variant -> morphdata"""
- ret={}
- dom=xml.dom.minidom.parseString(morphData)
- lemmas=dom.getElementsByTagName('lemma')
- for lemma in lemmas:
- variants=lemma.getElementsByTagName('variant')
- for variant in variants:
- atr=variant.getAttribute('form')
- if ret.has_key(atr):
- ret[atr].append=lemma.getAttribute('form')
- else:
- ret[atr]=[lemma.getAttribute('form')]
-
- return ret
+from Products.PageTemplates.PageTemplateFile import PageTemplateFile
+from Products.ECHO_content.analyseAndTag.analyseAndTag import DonatusFile
+from OFS.SimpleItem import SimpleItem
+from OFS.Folder import Folder
+import xml.parsers
+import os.path
+import urlparse,urllib
+from Globals import package_home
class ECHO_language:
"""language methods"""
- def donatusVariant2Lemma(self,nr='1'):
- """analyze by donatus"""
- return donatusVariant2Lemma(donatus(self.lemmatize(nr)))
-
- def tagLex(self,nr="1"):
- """generate Links"""
- global retLex
- global toggle
-
- toggle=0
- retLex=""
-
- lemmatized=self.lemmatize(nr)[0:]
- #print "ho",repr(lemmatized)
- variants=donatusVariant2Lemma(donatus(lemmatized))
-
- def createTag(name,attrs):
- global toggle
-
- if name=="w":
- toggle=1
- return ""
- else:
- tag="<"
- tag+=name
- for attr in attrs.keys():
- tag+=""" %s="%s" """%(attr,attrs[attr])
- tag+=">"
- return tag
-
- def createData(data):
- global toggle
- astring="""%s """
- if toggle: # tag war ein w
- toggle=0
- if variants.has_key(data):
- return astring%(variants[data][0],data)
- else:
- return astring%(data,data)
-
-
-
- # 3 handler functions
- def start_element(name, attrs):
- global retLex
-
- retLex+=createTag(name,attrs)
- def end_element(name):
- global retLex
- if not name=="w":
- retLex+="%s>"%(name.encode('utf-8'))
-
-
- def char_data(data):
- global retLex
- if data:
- try:
- retLex+=createData(data)
- except:
- """no"""
-
- p = xml.parsers.expat.ParserCreate()
-
- p.StartElementHandler = start_element
- p.EndElementHandler = end_element
- p.CharacterDataHandler = char_data
-
- p.Parse(lemmatized.encode('utf-8'),1)
- #print repr(lemmatized.encode('utf-8'))
- return retLex
+ def tagLex(self,nr="1",id=None):
+ """gerateword tags"""
+
+
+ df=DonatusFile(txt=self.getPage(_pn=nr,_id=id),baseUri=self.baseUri)
+
+ return df.convertedXML()
+ #return DonatusFile(txt=self.getPage(_pn=nr)).convertedXML()
+
+class Collection(SimpleItem):
+ def getCollectionXML(self,RESPONSE=None):
+
+ """get collection as xml"""
+ return self.aq_parent.getCollectionXML(collection=self.getId(),RESPONSE=RESPONSE)
+
+ def __init__(self,id):
+ """initialise"""
+ self.id=id
+ self.entries=[]
+
+
+ def getEntries(self):
+ """get the entries"""
+ entries=self.entries
+ for entry in entries: #backward compatibility, cannot be removed a.s.a.p.
+ print entry
+ if entry.has_key('master') and (len(entry['master'])<3):
+ entry['master']=(entry['master'][0],entry['master'][1],'')
+ if entry.has_key('slave') and (len(entry['slave'])<3):
+ entry['slave']=(entry['slave'][0],entry['slave'][1],'')
+
+ return self.entries
+
+ def deleteEntry(self,nr):
+ """delete an entry"""
+ del(self.entries[nr])
+
+ def changeEntry(self,nr,slaveUrl,masterID):
+ """change an entry, only slaveUrl"""
+ tmp=self.entries[nr]
+ tm=tmp['master']
+ tmp['slave']=(slaveUrl,"","")
+ tmp['master']=(tm[0],tm[1],masterID)
+ entries=self.entries[0:]
+ entries[nr]=tmp
+ self.entries=entries[0:]
+
+ def appendEntry(self,fn,id,type,pagelink):
+ """append an entry"""
+ #check if last entry is complete
+ createNew=False
+
+ if len(self.entries)==0: #noch gar kein Eintrag
+ createNew=True
+ else:
+ entry=self.entries[-1]
+ if entry.get('master',None) and entry.get('slave',None):
+ createNew=True
+ if createNew:
+
+ self.entries.append({})
+ entry=self.entries[-1]
+ if type=="master":
+ entry['master']=(fn,id,pagelink)
+ elif type=="slave":
+ entry['slave']=(fn,id,pagelink)
+
+ entries=self.entries[0:]
+ entries[-1]=entry
+ self.entries=entries[0:]
+
+class ECHO_linkCreator(Folder):
+ """creator for links"""
+
+ meta_type="ECHO_linkCreator"
+
+
+
+
+ def getCollectionEntries(self,collection):
+ col=getattr(self,collection,None)
+ if not col:
+ return []
+
+ return col.getEntries()
+
+ def getAllRefIDs(self,collection):
+ """return all refids"""
+ ret=[]
+ entries=self.getCollectionEntries(collection)
+
+ for entry in entries:
+ ret.append('_pagelink='+entry['master'][2])
+ return "&".join(ret)
+
+
+ def getCollectionXML(self,collection=None,RESPONSE=None):
+ """exports the collection as an XML file"""
+ if not collection:
+ return "no collection: need parameter collection=COLLECTION_NAME"
+
+ i=0
+ ret=""
+ ret+=""""""
+ ret+=""""""
+ ret+="""%s"""%collection
+ ret+=""""""%self.getUrls(collection)[0]
+ ret+=""""""%self.getUrls(collection)[1]
+
+ for entry in self.getCollectionEntries(collection):
+ ret+=""""""%i
+ i+=1
+
+ if entry.has_key('master'):
+ ms=entry['master']
+
+
+ try:
+ if urlparse.urlparse(ms[0])[0]=="http": # url
+ ret+=""""""%urllib.quote(ms[0])
+ else:
+ ret+=""""""
+ except: #ohne pagelink&
+ ret+=""""""%ms
+ if entry.has_key('slave'):
+ ms=entry['slave']
+ try:
+ if urlparse.urlparse(ms[0])[0]=="http": # url
+ ret+=""""""%urllib.quote(ms[0])
+ else:
+ ret+=""""""%(ms[0],ms[1])
+ splitted=ms[2].split("/")
+ if (len(splitted)>3):
+ ret+=""""""%(splitted[0],splitted[3])
+ ret+=""""""
+ except: #ohne pagelink
+ ret+=""""""%ms
+
+ ret+=""
+ ret+=""""""
+ if RESPONSE:
+ RESPONSE.setHeader("Content-Type","text/xml")
+ return ret
+ def index_html(self,collection=None):
+ """show create links"""
+ if not collection:
+ return "no collection: need parameter collection=COLLECTION_NAME"
+
+
+ pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','ECHO_linkCreator_main')).__of__(self)
+
+ col=getattr(self,collection,None)
+ if not col:
+ return []
+
+ masterUrl=getattr(col,'masterUrl','')
+ slaveUrl=getattr(col,'slaveUrl','')
+
+ return pt(collection=collection,masterUrl=masterUrl,slaveUrl=slaveUrl)
+
+ def addEntry(self,collection,fn,id,type,pagelink,fromurl=None,RESPONSE=None,REQUEST=None):
+ """add an entry"""
-
- def lemmatize(self,nr='1',lang="de"):
- """lemmatize"""
- global ret
- ret=""
-
- def createTag(name,attrs):
- tag="<"
- tag+=name
- for attr in attrs.keys():
- tag+=""" %s="%s" """%(attr,attrs[attr])
- tag+=">"
- return tag
-
- def insertW(str):
- splitted=str.split()
- wordlist=["%s"%split for split in splitted]
- return string.join(wordlist,'\n')
-
- # 3 handler functions
- def start_element(name, attrs):
- global ret
- ret+=createTag(name,attrs)
- def end_element(name):
- global ret
- ret+="%s>"%(name.encode('utf-8'))
-
- def char_data(data):
- global ret
- ret+=insertW(data)
-
- p = xml.parsers.expat.ParserCreate()
-
- p.StartElementHandler = start_element
- p.EndElementHandler = end_element
- p.CharacterDataHandler = char_data
-
- p.Parse(self.getPage(nr), 1)
- txt="""
-
- """
- ret=txt%(lang,ret)
-
- return ret
+ col=getattr(self, collection,None)
+ if not col:
+ self._setObject(collection,Collection(collection))
+ col=getattr(self, collection)
+
+ col.appendEntry(fn,id,type,pagelink)
+
+ if fromurl and RESPONSE:
+
+ RESPONSE.setHeader("Expires",(DateTime()-1).rfc822())
+ RESPONSE.setHeader("Cache-Control", "no-cache")
+ RESPONSE.redirect(fromurl)
+
+
+ def changeEntry(self,collection,nr,slaveUrl,masterID,RESPONSE=None):
+ """change an entry 8only slaveUrl at the moment"""
+ col=getattr(self, collection,None)
+ col.changeEntry(nr,slaveUrl,masterID)
+
+ if RESPONSE:
+ RESPONSE.redirect(self.absolute_url()+"?collection="+collection)
+
+ def removeEntry(self,collection,nr,RESPONSE=None):
+ """remove an entry"""
+ col=getattr(self, collection,None)
+ col.deleteEntry(nr)
+
+ if RESPONSE:
+ RESPONSE.redirect(self.absolute_url()+"?collection="+collection)
+
+ def setUrls(self,collection,masterUrl,slaveUrl,RESPONSE=None):
+ """set the urls for the document viewer"""
+ col=getattr(self, collection,None)
+ setattr(col,'masterUrl',masterUrl)
+ setattr(col,'slaveUrl',slaveUrl)
+
+ if RESPONSE:
+ RESPONSE.redirect(self.absolute_url()+"?collection="+collection)
+
+ def getUrls(self,collection,RESPONSE=None):
+ """set the urls for the document viewer"""
+ col=getattr(self, collection,None)
+ x=getattr(col,'masterUrl')
+ y=getattr(col,'slaveUrl')
+ return x,y
+
+def manage_addECHO_linkCreatorForm(self,RESPONSE=None):
+ """Form for adding"""
+ manage_addECHO_linkCreator(self,RESPONSE)
+
+def manage_addECHO_linkCreator(self,RESPONSE=None):
+ """Add an ECHO_main"""
+ id='linkCreator'
+ self._setObject(id,ECHO_linkCreator(id))
+
+
+ if RESPONSE is not None:
+ RESPONSE.redirect('manage_main')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file