--- ECHO_content/ECHO_collection.py 2004/08/09 10:10:56 1.162 +++ ECHO_content/ECHO_collection.py 2004/08/27 22:57:57 1.165 @@ -48,6 +48,38 @@ import xml.dom.minidom from ECHO_graphicalOverview import javaHandler,javaScriptMain import ECHO_helpers +def donatus(txt2): + import xmlrpclib + + server = xmlrpclib.ServerProxy("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc") + + txt=txt2.encode('utf-8') + bin=xmlrpclib.Binary(txt) + + + + ret=server.donatus.analyze(bin) + + + return ret['morphData'].data + + +def donatusVariant2Lemma(morphData): + """creates hash variant -> morphdata""" + ret={} + dom=xml.dom.minidom.parseString(morphData) + lemmas=dom.getElementsByTagName('lemma') + for lemma in lemmas: + variants=lemma.getElementsByTagName('variant') + for variant in variants: + atr=variant.getAttribute('form') + if ret.has_key(atr): + ret[atr].append=lemma.getAttribute('form') + else: + ret[atr]=[lemma.getAttribute('form')] + + return ret + #regexp for extracting elements from xml patternTXT=r"<\s*txt.*?>(.*?)" regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL) @@ -238,6 +270,13 @@ def getText(nodelist): rc = rc + node.data return rc +def getTextFromNode(nodename): + nodelist=nodename.childNodes + rc = "" + for node in nodelist: + if node.nodeType == node.TEXT_NODE: + rc = rc + node.data + return rc def sendFile(self, filename, type): """sends an object or a local file (in the product) as response""" @@ -281,7 +320,7 @@ class BrowserCheck: def writeMetadata(url,metadict,project=None,startpage=None,xslt=None,thumbtemplate=None,topbar=None,digiLibTemplate=None,xmlfrag=None,digiliburlprefix=None): """Einlesen der Metadaten und und erstellen des geaenderten XML file""" - print "url",url + def updateTextToolNode(tag,value): #print dom,tag,value metanode=dom.getElementsByTagName('texttool')[0] @@ -310,7 +349,7 @@ def writeMetadata(url,metadict,project=N else: try: geturl="" - for line in urllib.urlopen(url).readlines(): + for line in ECHO_helpers.urlopen(url).readlines(): geturl=geturl+line @@ -392,7 +431,7 @@ def readMetadata(url): metadict={} try: geturl="" - for line in urllib.urlopen(url).readlines(): + for line in ECHO_helpers.urlopen(url).readlines(): geturl=geturl+line @@ -584,7 +623,171 @@ def manage_addECHO_layoutTemplate(self, REQUEST.RESPONSE.redirect(u+'/manage_main') return '' +class ECHO_fullText(ZopePageTemplate): + """echo fulltext in xml""" + + meta_type="ECHO_fullText" + def donatusVariant2Lemma(self,nr='1'): + """analyze by donatus""" + return donatusVariant2Lemma(donatus(self.lemmatize(nr))) + + def tagLex(self,nr="1"): + """generate Links""" + global retLex + global toggle + + toggle=0 + retLex="" + + lemmatized=self.lemmatize(nr)[0:] + #print "ho",repr(lemmatized) + variants=donatusVariant2Lemma(donatus(lemmatized)) + + def createTag(name,attrs): + global toggle + + if name=="w": + toggle=1 + return "" + else: + tag="<" + tag+=name + for attr in attrs.keys(): + tag+=""" %s="%s" """%(attr,attrs[attr]) + tag+=">" + return tag + + def createData(data): + global toggle + astring="""%s """ + if toggle: # tag war ein w + toggle=0 + if variants.has_key(data): + return astring%(variants[data][0],data) + else: + return astring%(data,data) + + + + # 3 handler functions + def start_element(name, attrs): + global retLex + + retLex+=createTag(name,attrs) + def end_element(name): + global retLex + if not name=="w": + retLex+=""%(name.encode('utf-8')) + + + def char_data(data): + global retLex + if data: + try: + retLex+=createData(data) + except: + """no""" + + p = xml.parsers.expat.ParserCreate() + + p.StartElementHandler = start_element + p.EndElementHandler = end_element + p.CharacterDataHandler = char_data + + p.Parse(lemmatized.encode('utf-8'),1) + #print repr(lemmatized.encode('utf-8')) + + return retLex + + + def lemmatize(self,nr='1',lang="de"): + """lemmatize""" + global ret + ret="" + + def createTag(name,attrs): + tag="<" + tag+=name + for attr in attrs.keys(): + tag+=""" %s="%s" """%(attr,attrs[attr]) + tag+=">" + return tag + + def insertW(str): + splitted=str.split() + wordlist=["%s"%split for split in splitted] + return string.join(wordlist,'\n') + + # 3 handler functions + def start_element(name, attrs): + global ret + ret+=createTag(name,attrs) + def end_element(name): + global ret + ret+=""%(name.encode('utf-8')) + + def char_data(data): + global ret + ret+=insertW(data) + + p = xml.parsers.expat.ParserCreate() + + p.StartElementHandler = start_element + p.EndElementHandler = end_element + p.CharacterDataHandler = char_data + + p.Parse(self.getPage(nr), 1) + txt=""" +
%s
+
""" + ret=txt%(lang,ret) + + return ret + + def getPage(self,nr='1'): + """get page n""" + dom=xml.dom.minidom.parseString(self()) + pages=dom.getElementsByTagName('page') + + return pages[int(nr)-1].toxml('utf-8') + +# Product registration and Add support +manage_addECHO_fullTextForm = PageTemplateFile( + 'zpt/AddECHO_fullText.zpt', globals()) + +from urllib import quote + +def manage_addECHO_fullText(self, id, title=None, text=None, + REQUEST=None, submit=None): + "Add a Page Template with optional file content." + + id = str(id) + if REQUEST is None: + self._setObject(id, ECHO_fullText(id, text)) + ob = getattr(self, id) + if title: + ob.pt_setTitle(title) + return ob + else: + file = REQUEST.form.get('file') + headers = getattr(file, 'headers', None) + if headers is None or not file.filename: + zpt = ECHO_fullText(id) + else: + zpt = ECHO_fullText(id, file, headers.get('content_type')) + + self._setObject(id, zpt) + + try: + u = self.DestinationURL() + except AttributeError: + u = REQUEST['URL1'] + + if submit == " Add and Edit ": + u = "%s/%s" % (u, quote(id)) + REQUEST.RESPONSE.redirect(u+'/manage_main') + return '' class ECHO_resource(Folder,Persistent): """ECHO Ressource""" @@ -601,7 +804,7 @@ class ECHO_resource(Folder,Persistent): """showrdf""" self.REQUEST.RESPONSE.setHeader('Content-Type','text/xml') ret="""\n\n""" - ret+=self.getRDF(urn="echo:collectionroot")+"\n" + ret+=self.getRDF(urn="echo:colllectionroot")+"\n" ret+="""""" return ret @@ -688,7 +891,7 @@ class ECHO_resource(Folder,Persistent): try: geturl="" - for line in urllib.urlopen(url).readlines(): + for line in ECHO_helpers.urlopen(url).readlines(): geturl=geturl+line @@ -718,10 +921,10 @@ class ECHO_resource(Folder,Persistent): params="accessright=%s"%accessright - #print urllib.urlopen(self.absolute_url()+'/setAccessRightXML'+'?'+params).read() + #print ECHO_helpers.urlopen(self.absolute_url()+'/setAccessRightXML'+'?'+params).read() - urllib.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/setAccessRightXML'+urllib.quote('?'+params))).read() + ECHO_helpers.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/setAccessRightXML'+urllib.quote('?'+params))).read() if RESPONSE is not None: @@ -734,7 +937,7 @@ class ECHO_resource(Folder,Persistent): try: geturl="" - for line in urllib.urlopen(url).readlines(): + for line in ECHO_helpers.urlopen(url).readlines(): geturl=geturl+line @@ -810,7 +1013,7 @@ class ECHO_resource(Folder,Persistent): params="startpage=%s"%startpage - urllib.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML'+urllib.quote('?'+params))).read() + ECHO_helpers.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML'+urllib.quote('?'+params))).read() path=self.metalink @@ -823,10 +1026,11 @@ class ECHO_resource(Folder,Persistent): path=re.sub('http://foxridge.rz-berlin.mpg.de:8080','',path) # falls foxridge als server path=re.sub('http://content.mpiwg-berlin.mpg.de','',path) # falls content als server path=re.sub('http://foxridge.rz-berlin.mpg.de','',path) # falls foxridge als server + path=re.sub('http://vision.rz-berlin.mpg.de','',path) # falls vision als server path=re.sub('/index.meta','',path) - urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines() + ECHO_helpers.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines() if RESPONSE is not None: RESPONSE.redirect('manage_main') @@ -866,7 +1070,7 @@ class ECHO_resource(Folder,Persistent): params="project=%s&xslt=%s&thumbtemplate=%s&topbar=%s&digiLibTemplate=%s&digiliburlprefix=%s"%(project,xslt,thumbtemplate,topbar,digiLibTemplate,digiliburlprefix) - urllib.urlopen('http://echo.mpiwg-berlin.mpg.de/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML'+urllib.quote('?'+params))).read() + ECHO_helpers.urlopen('http://echo.mpiwg-berlin.mpg.de/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML'+urllib.quote('?'+params))).read() #print self.absolute_url()+'/newMetaXML'+urllib.quote'?'+params) # hack Pfad auf die Dokumente @@ -886,7 +1090,11 @@ class ECHO_resource(Folder,Persistent): path=re.sub('http://foxridge.rz-berlin.mpg.de:8080','',path) # falls foxridge als server path=re.sub('http://foxridge.rz-berlin.mpg.de','',path) # falls foxridge als server path=re.sub('http://content.mpiwg-berlin.mpg.de','',path) # falls content als server - return urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines() + + path=re.sub('http://vision.rz-berlin.mpg.de','',path) # falls vision als server + + return ECHO_helpers.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines() + if RESPONSE is not None: RESPONSE.redirect('manage_main') @@ -921,7 +1129,7 @@ class ECHO_resource(Folder,Persistent): def getFullTextXML(self,noredirect=None): """getFullTextXML; gives the FullText as an XML Document, and if somthing goes wrong.""" try: - fh=urllib.urlopen(self.metalink) + fh=ECHO_helpers.urlopen(self.metalink) dom=xml.dom.minidom.parse(fh) texttools=dom.getElementsByTagName('texttool') text=texttools[0].getElementsByTagName('text') @@ -944,7 +1152,7 @@ class ECHO_resource(Folder,Persistent): def getImageView(self,noredirect=None): """getImages; give Imageviewr and if somthing goes wrong.""" try: - fh=urllib.urlopen(self.metalink) + fh=ECHO_helpers.urlopen(self.metalink) dom=xml.dom.minidom.parse(fh) texttools=dom.getElementsByTagName('texttool') text=texttools[0].getElementsByTagName('image') @@ -1193,7 +1401,7 @@ class ECHO_resource(Folder,Persistent): except: """nothing""" - urllib.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML')).read() + ECHO_helpers.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML')).read() if RESPONSE is not None: @@ -1743,7 +1951,13 @@ class ECHO_collection(Folder, Persistent pt=PageTemplateFile('Products/ECHO_content/zpt/changeTitleForm').__of__(self) pt.content_type="text/html" return pt() - + + def changeWeights(self): + """change form""" + pt=PageTemplateFile('Products/ECHO_content/zpt/changeWeightForm').__of__(self) + pt.content_type="text/html" + return pt() + def changeMetaDataLinks(self): """change form""" pt=PageTemplateFile('Products/ECHO_content/zpt/changeMetaDataLinkForm').__of__(self) @@ -1779,7 +1993,7 @@ class ECHO_collection(Folder, Persistent """not""" return ret - def changeLabelsInCollection(self): + def changeMetaDataLinkInCollection(self): """change all lables of a collection""" ret="" argv=self.REQUEST.form @@ -1789,7 +2003,22 @@ class ECHO_collection(Folder, Persistent try: ret+=resource[1].getId()+" "+argv[resource[1].getId()]+"
" - resource[1].label=argv[resource[1].getId()][0:] + resource[1].metalink=argv[resource[1].getId()][0:] + except: + """not""" + return ret + + def changeWeightsInCollection(self): + """change all lables of a collection""" + ret="" + argv=self.REQUEST.form + + resources=self.ZopeFind(self,obj_metatypes=['ECHO_pageTemplate','ECHO_resource','ECHO_collection','ECHO_link','ECHO_externalLink']) + for resource in resources: + + try: + ret+=resource[1].getId()+" "+argv[resource[1].getId()]+"
" + resource[1].weight=argv[resource[1].getId()][0:] except: """not""" return ret @@ -2245,6 +2474,7 @@ class ECHO_collection(Folder, Persistent {'label':'Main Config','action':'ECHO_collection_config'}, {'label':'Change Labels','action':'changeLabels'}, {'label':'Change Titles','action':'changeTitles'}, + {'label':'Change Weights','action':'changeWeights'}, {'label':'Rerender Labels and Titles','action':'ECHO_rerenderLinksMDWarning'}, {'label':'Graphics','action':'ECHO_graphicEntry'}, {'label':'create resources from XML','action':'createRessourcesFromXMLForm'}, @@ -2711,6 +2941,100 @@ def manage_addECHO_userFolderForm(self): """add a user folder form""" return manage_addECHO_userFolder(self) +def getEdges(seqs,urn): + """edges""" + ret=[] + #print urn,seqs + for seqsList in seqs[urn]: + + try: + for seq in seqsList: + ret+=seq.getElementsByTagName("RDF:li") + except: + ret+=seqsList.getElementsByTagName("RDF:li") + return ret + +def createSubs(self,seqs,descrs,urn): + """create subs""" + for edge in getEdges(seqs,urn): + cn=createNode(self,descrs[edge.getAttribute('RDF:resource')]) + if cn[0]=="CDLI_group": + #print "HHHHHH",cn[1],seqs,descrs,cn[2] + createSubs(cn[1],seqs,descrs,cn[2]) +def getEdges2(seqs,urn): + """edges""" + ret=[] + #print urn,seqs + + return seqs[urn] + +def createSubs2(self,seqs,descrs,urn,level=0): + """create subs""" + + for edge in getEdges2(seqs,urn): + print "urnXX",urn.encode('utf-8'),level + cn=createNode2(self,descrs,edge) + if cn[0]=="CDLI_group": + + createSubs2(cn[1],seqs,descrs,cn[2],level+1) + print "urnddonesubs",urn.encode('utf-8') + print "urndonenode",urn.encode('utf-8') + return + +def createNode2(self,descrs,node): + name=descrs[node]['name'] + type=descrs[node]['type'] + urn=node + print " will create",node.encode('utf-8') + id=re.sub('[^a-zA-Z0-9]','',name).encode('ascii','ignore') + #print "id",id + #print type + #self.REQUEST.RESPONSE.write("

%s

\n"%id) + if type=="CDLI_group": + + try: + manage_addECHO_collection(self,id,name,name,"","","","","") + + except: + self.REQUEST.RESPONSE.write("

Error%s

\n"%id) + + self.REQUEST.RESPONSE.write("

Creates:%s

\n"%getattr(self,id).absolute_url()) + + return type,getattr(self,id),urn + + if type=="CDLI_item": + try: + manage_addECHO_resource(self,id,name,name,"","",urn,"","") + except: + self.REQUEST.RESPONSE.write("

Error%s

\n"%id) + self.REQUEST.RESPONSE.write("

Creates:%s

\n"%getattr(self,id).absolute_url()) + + + return "XX" + +def createNode(self,nodes): + for node in nodes: + name=getTextFromNode(node.getElementsByTagName("ECHONAVIGATION:name")[0]) + type=getTextFromNode(node.getElementsByTagName("ECHONAVIGATION:type")[0]) + urn=node.getAttribute("RDF:about") + + id=re.sub('[^a-zA-Z0-9]','',name).encode('ascii','ignore') + #print "id",id + #print type + self.REQUEST.RESPONSE.write("

%s

\n"%id) + if type=="CDLI_group": + print self.getId() + + manage_addECHO_collection(self,id,name,name,"","","","","") + print "done::::",getattr(self,id) + return type,getattr(self,id),urn + + if type=="CDLI_item": + manage_addECHO_resource(self,id,name,name,"","",urn,"","") + return "XX" + + + class ECHO_root(Folder,Persistent,Implicit): """ECHO Root Folder""" @@ -2718,10 +3042,139 @@ class ECHO_root(Folder,Persistent,Implic meta_type="ECHO_root" + def getTablet(self,item): + read=urllib.urlopen("http://enlil.museum.upenn.edu/cgi-bin/cdlget.plx?item=%s&project=ncdl"%item).read() + + read=re.sub("\[search\]","search",read) + + return read[read.find("")+6:read.rfind("")] + + def generateFromRDF2(self): + """generate from RDF2""" + global seqs + seqs={} + global descrs + descrs={} + global key + key="" + global value + value="" + def start_element(name,attrs): + + global seqs + global descrs + global key + global value + seq="" + if name=="RDF:Seq": + key=attrs.get('RDF:about') + try: # teste ob liste + x=seqs[key][0] + except: + + seqs[key]=[] + + + elif name=="RDF:Description": + key=attrs.get('RDF:about') + + + elif name=="RDF:li": + name=attrs.get('RDF:resource') + seqs[key].append(name) + + elif name=="ECHONAVIGATION:type": + value="type" + + elif name=="ECHONAVIGATION:name": + value="name" + elif name=="ECHONAVIGATION:linkClickable": + value="linkClickable" + + def end_element(name): + """nothing""" + key="" + value="" + + def char_data(data): + """nothing""" + + data=re.sub("\n","",data) + try: + if descrs[key].has_key(value): + descrs[key][value]+=data + else: + descrs[key][value]=data + except: + + descrs[key]={} + descrs[key][value]=data + + p = xml.parsers.expat.ParserCreate() + + p.StartElementHandler = start_element + p.EndElementHandler = end_element + p.CharacterDataHandler = char_data + fileName="/Users/dwinter/Documents/Projekte/CDLI/test11.rdf" + + p.ParseFile(file(fileName)) + self.REQUEST.RESPONSE.write("

Start

") + createSubs2(self,seqs,descrs,"/Cuneiform Corpus") + self.REQUEST.RESPONSE.write("

done

") + print "done" + + + return "done" + + def generateFromRDF(self): + """generate Structure from rdf""" + dom=xml.dom.minidom.parse("/Users/dwinter/Documents/Projekte/CDLI/test10.rdf") + seqs={} + descrs={} + for seq in dom.getElementsByTagName("RDF:Seq"): + key=seq.getAttribute('RDF:about') + try: + seqs[key].append(seq) + except: + seqs[key]=[seq] + + + for descr in dom.getElementsByTagName("RDF:Description"): + key=descr.getAttribute('RDF:about') + try: + descrs[key].append(descr) + except: + descrs[key]=[descr] + createSubs(self,seqs,descrs,"echo:collectionroot") + + + + def changeWeightsInCollection(self): + """change all lables of a collection""" + ret="" + argv=self.REQUEST.form + + resources=self.ZopeFind(self,obj_metatypes=['ECHO_resource','ECHO_collection','ECHO_link','ECHO_externalLink']) + for resource in resources: + + try: + ret+=resource[1].getId()+" "+argv[resource[1].getId()]+"
" + resource[1].weight=argv[resource[1].getId()][0:] + except: + """not""" + return ret + + def changeWeights(self): + """change form""" + pt=PageTemplateFile('Products/ECHO_content/zpt/changeWeightForm').__of__(self) + pt.content_type="text/html" + return pt() + getSubCols = ECHO_helpers.getSubCols manage_options=Folder.manage_options+( {'label':'Main Config','action':'ECHO_copyright_configForm'}, {'label':'Reload Metadata','action':'reloadMetaDataFromStorageWarning'}, + {'label':'Change Weights','action':'changeWeights'}, ) @@ -2769,7 +3222,7 @@ class ECHO_root(Folder,Persistent,Implic def showContent(self,path): """return content/html""" - return urllib.urlopen(path+"/content_html").read() + return ECHO_helpers.urlopen(path+"/content_html").read() def getImageViewers(self): """images""" @@ -3068,6 +3521,7 @@ class ECHO_root(Folder,Persistent,Implic def xml2html(self,str,quote="yes"): """link2html fuer VLP muss hier noch raus""" + print str if str: if quote=="yes2": str=re.sub("\&","&",str) @@ -3081,8 +3535,9 @@ class ECHO_root(Folder,Persistent,Implic if self.checkRef(ref): link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref) - str= dom.toxml('utf-8') - #print str + str= dom.toxml() + + #print link.toxml('utf-8') retStr=regexpPage.search(str) return retStr.group(1)