--- ECHO_content/ECHO_collection.py 2004/08/09 10:10:56 1.162 +++ ECHO_content/ECHO_collection.py 2004/08/17 15:22:14 1.163 @@ -48,6 +48,38 @@ import xml.dom.minidom from ECHO_graphicalOverview import javaHandler,javaScriptMain import ECHO_helpers +def donatus(txt2): + import xmlrpclib + + server = xmlrpclib.ServerProxy("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc") + + txt=txt2.encode('utf-8') + bin=xmlrpclib.Binary(txt) + + + + ret=server.donatus.analyze(bin) + + + return ret['morphData'].data + + +def donatusVariant2Lemma(morphData): + """creates hash variant -> morphdata""" + ret={} + dom=xml.dom.minidom.parseString(morphData) + lemmas=dom.getElementsByTagName('lemma') + for lemma in lemmas: + variants=lemma.getElementsByTagName('variant') + for variant in variants: + atr=variant.getAttribute('form') + if ret.has_key(atr): + ret[atr].append=lemma.getAttribute('form') + else: + ret[atr]=[lemma.getAttribute('form')] + + return ret + #regexp for extracting elements from xml patternTXT=r"<\s*txt.*?>(.*?)" regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL) @@ -281,7 +313,7 @@ class BrowserCheck: def writeMetadata(url,metadict,project=None,startpage=None,xslt=None,thumbtemplate=None,topbar=None,digiLibTemplate=None,xmlfrag=None,digiliburlprefix=None): """Einlesen der Metadaten und und erstellen des geaenderten XML file""" - print "url",url + def updateTextToolNode(tag,value): #print dom,tag,value metanode=dom.getElementsByTagName('texttool')[0] @@ -310,7 +342,7 @@ def writeMetadata(url,metadict,project=N else: try: geturl="" - for line in urllib.urlopen(url).readlines(): + for line in ECHO_helpers.urlopen(url).readlines(): geturl=geturl+line @@ -392,7 +424,7 @@ def readMetadata(url): metadict={} try: geturl="" - for line in urllib.urlopen(url).readlines(): + for line in ECHO_helpers.urlopen(url).readlines(): geturl=geturl+line @@ -584,7 +616,171 @@ def manage_addECHO_layoutTemplate(self, REQUEST.RESPONSE.redirect(u+'/manage_main') return '' +class ECHO_fullText(ZopePageTemplate): + """echo fulltext in xml""" + + meta_type="ECHO_fullText" + + def donatusVariant2Lemma(self,nr='1'): + """analyze by donatus""" + return donatusVariant2Lemma(donatus(self.lemmatize(nr))) + + def tagLex(self,nr="1"): + """generate Links""" + global retLex + global toggle + + toggle=0 + retLex="" + + lemmatized=self.lemmatize(nr)[0:] + #print "ho",repr(lemmatized) + variants=donatusVariant2Lemma(donatus(lemmatized)) + + def createTag(name,attrs): + global toggle + + if name=="w": + toggle=1 + return "" + else: + tag="<" + tag+=name + for attr in attrs.keys(): + tag+=""" %s="%s" """%(attr,attrs[attr]) + tag+=">" + return tag + + def createData(data): + global toggle + astring="""%s """ + if toggle: # tag war ein w + toggle=0 + if variants.has_key(data): + return astring%(variants[data][0],data) + else: + return astring%(data,data) + + + # 3 handler functions + def start_element(name, attrs): + global retLex + print name + retLex+=createTag(name,attrs) + def end_element(name): + global retLex + if not name=="w": + retLex+=""%(name.encode('utf-8')) + + + def char_data(data): + global retLex + if data: + try: + retLex+=createData(data) + except: + """no""" + + p = xml.parsers.expat.ParserCreate() + + p.StartElementHandler = start_element + p.EndElementHandler = end_element + p.CharacterDataHandler = char_data + + p.Parse(lemmatized.encode('utf-8'),1) + print repr(lemmatized.encode('utf-8')) + + return retLex + + + def lemmatize(self,nr='1',lang="de"): + """lemmatize""" + global ret + ret="" + + def createTag(name,attrs): + tag="<" + tag+=name + for attr in attrs.keys(): + tag+=""" %s="%s" """%(attr,attrs[attr]) + tag+=">" + return tag + + def insertW(str): + splitted=str.split() + wordlist=["%s"%split for split in splitted] + return string.join(wordlist,'\n') + + # 3 handler functions + def start_element(name, attrs): + global ret + ret+=createTag(name,attrs) + def end_element(name): + global ret + ret+=""%(name.encode('utf-8')) + + def char_data(data): + global ret + ret+=insertW(data) + + p = xml.parsers.expat.ParserCreate() + + p.StartElementHandler = start_element + p.EndElementHandler = end_element + p.CharacterDataHandler = char_data + + p.Parse(self.getPage(nr), 1) + txt=""" +
%s
+
""" + ret=txt%(lang,ret) + + return ret + + def getPage(self,nr='1'): + """get page n""" + dom=xml.dom.minidom.parseString(self()) + pages=dom.getElementsByTagName('page') + + return pages[int(nr)-1].toxml('utf-8') + +# Product registration and Add support +manage_addECHO_fullTextForm = PageTemplateFile( + 'zpt/AddECHO_fullText.zpt', globals()) + +from urllib import quote + +def manage_addECHO_fullText(self, id, title=None, text=None, + REQUEST=None, submit=None): + "Add a Page Template with optional file content." + + id = str(id) + if REQUEST is None: + self._setObject(id, ECHO_fullText(id, text)) + ob = getattr(self, id) + if title: + ob.pt_setTitle(title) + return ob + else: + file = REQUEST.form.get('file') + headers = getattr(file, 'headers', None) + if headers is None or not file.filename: + zpt = ECHO_fullText(id) + else: + zpt = ECHO_fullText(id, file, headers.get('content_type')) + + self._setObject(id, zpt) + + try: + u = self.DestinationURL() + except AttributeError: + u = REQUEST['URL1'] + + if submit == " Add and Edit ": + u = "%s/%s" % (u, quote(id)) + REQUEST.RESPONSE.redirect(u+'/manage_main') + return '' class ECHO_resource(Folder,Persistent): """ECHO Ressource""" @@ -688,7 +884,7 @@ class ECHO_resource(Folder,Persistent): try: geturl="" - for line in urllib.urlopen(url).readlines(): + for line in ECHO_helpers.urlopen(url).readlines(): geturl=geturl+line @@ -718,10 +914,10 @@ class ECHO_resource(Folder,Persistent): params="accessright=%s"%accessright - #print urllib.urlopen(self.absolute_url()+'/setAccessRightXML'+'?'+params).read() + #print ECHO_helpers.urlopen(self.absolute_url()+'/setAccessRightXML'+'?'+params).read() - urllib.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/setAccessRightXML'+urllib.quote('?'+params))).read() + ECHO_helpers.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/setAccessRightXML'+urllib.quote('?'+params))).read() if RESPONSE is not None: @@ -734,7 +930,7 @@ class ECHO_resource(Folder,Persistent): try: geturl="" - for line in urllib.urlopen(url).readlines(): + for line in ECHO_helpers.urlopen(url).readlines(): geturl=geturl+line @@ -810,7 +1006,7 @@ class ECHO_resource(Folder,Persistent): params="startpage=%s"%startpage - urllib.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML'+urllib.quote('?'+params))).read() + ECHO_helpers.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML'+urllib.quote('?'+params))).read() path=self.metalink @@ -826,7 +1022,7 @@ class ECHO_resource(Folder,Persistent): path=re.sub('/index.meta','',path) - urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines() + ECHO_helpers.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines() if RESPONSE is not None: RESPONSE.redirect('manage_main') @@ -866,7 +1062,7 @@ class ECHO_resource(Folder,Persistent): params="project=%s&xslt=%s&thumbtemplate=%s&topbar=%s&digiLibTemplate=%s&digiliburlprefix=%s"%(project,xslt,thumbtemplate,topbar,digiLibTemplate,digiliburlprefix) - urllib.urlopen('http://echo.mpiwg-berlin.mpg.de/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML'+urllib.quote('?'+params))).read() + ECHO_helpers.urlopen('http://echo.mpiwg-berlin.mpg.de/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML'+urllib.quote('?'+params))).read() #print self.absolute_url()+'/newMetaXML'+urllib.quote'?'+params) # hack Pfad auf die Dokumente @@ -886,7 +1082,7 @@ class ECHO_resource(Folder,Persistent): path=re.sub('http://foxridge.rz-berlin.mpg.de:8080','',path) # falls foxridge als server path=re.sub('http://foxridge.rz-berlin.mpg.de','',path) # falls foxridge als server path=re.sub('http://content.mpiwg-berlin.mpg.de','',path) # falls content als server - return urllib.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines() + return ECHO_helpers.urlopen("http://nausikaa2.rz-berlin.mpg.de:86/cgi-bin/toc/admin/reg.cgi?path=%s"%path).readlines() if RESPONSE is not None: RESPONSE.redirect('manage_main') @@ -921,7 +1117,7 @@ class ECHO_resource(Folder,Persistent): def getFullTextXML(self,noredirect=None): """getFullTextXML; gives the FullText as an XML Document, and if somthing goes wrong.""" try: - fh=urllib.urlopen(self.metalink) + fh=ECHO_helpers.urlopen(self.metalink) dom=xml.dom.minidom.parse(fh) texttools=dom.getElementsByTagName('texttool') text=texttools[0].getElementsByTagName('text') @@ -944,7 +1140,7 @@ class ECHO_resource(Folder,Persistent): def getImageView(self,noredirect=None): """getImages; give Imageviewr and if somthing goes wrong.""" try: - fh=urllib.urlopen(self.metalink) + fh=ECHO_helpers.urlopen(self.metalink) dom=xml.dom.minidom.parse(fh) texttools=dom.getElementsByTagName('texttool') text=texttools[0].getElementsByTagName('image') @@ -1193,7 +1389,7 @@ class ECHO_resource(Folder,Persistent): except: """nothing""" - urllib.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML')).read() + ECHO_helpers.urlopen('http://xserve02.mpiwg-berlin.mpg.de:18880/echo_nav/storage/downloadExternalXML?index_meta_url=%s&xml_url=%s'%(self.metalink,self.absolute_url()+'/newMetaXML')).read() if RESPONSE is not None: @@ -1743,7 +1939,13 @@ class ECHO_collection(Folder, Persistent pt=PageTemplateFile('Products/ECHO_content/zpt/changeTitleForm').__of__(self) pt.content_type="text/html" return pt() - + + def changeWeights(self): + """change form""" + pt=PageTemplateFile('Products/ECHO_content/zpt/changeWeightForm').__of__(self) + pt.content_type="text/html" + return pt() + def changeMetaDataLinks(self): """change form""" pt=PageTemplateFile('Products/ECHO_content/zpt/changeMetaDataLinkForm').__of__(self) @@ -1779,7 +1981,7 @@ class ECHO_collection(Folder, Persistent """not""" return ret - def changeLabelsInCollection(self): + def changeMetaDataLinkInCollection(self): """change all lables of a collection""" ret="" argv=self.REQUEST.form @@ -1789,7 +1991,22 @@ class ECHO_collection(Folder, Persistent try: ret+=resource[1].getId()+" "+argv[resource[1].getId()]+"
" - resource[1].label=argv[resource[1].getId()][0:] + resource[1].metalink=argv[resource[1].getId()][0:] + except: + """not""" + return ret + + def changeWeightsInCollection(self): + """change all lables of a collection""" + ret="" + argv=self.REQUEST.form + + resources=self.ZopeFind(self,obj_metatypes=['ECHO_pageTemplate','ECHO_resource','ECHO_collection','ECHO_link','ECHO_externalLink']) + for resource in resources: + + try: + ret+=resource[1].getId()+" "+argv[resource[1].getId()]+"
" + resource[1].weight=argv[resource[1].getId()][0:] except: """not""" return ret @@ -2245,6 +2462,7 @@ class ECHO_collection(Folder, Persistent {'label':'Main Config','action':'ECHO_collection_config'}, {'label':'Change Labels','action':'changeLabels'}, {'label':'Change Titles','action':'changeTitles'}, + {'label':'Change Weights','action':'changeWeights'}, {'label':'Rerender Labels and Titles','action':'ECHO_rerenderLinksMDWarning'}, {'label':'Graphics','action':'ECHO_graphicEntry'}, {'label':'create resources from XML','action':'createRessourcesFromXMLForm'}, @@ -2717,11 +2935,33 @@ class ECHO_root(Folder,Persistent,Implic security=ClassSecurityInfo() meta_type="ECHO_root" + + def changeWeightsInCollection(self): + """change all lables of a collection""" + ret="" + argv=self.REQUEST.form + + resources=self.ZopeFind(self,obj_metatypes=['ECHO_resource','ECHO_collection','ECHO_link','ECHO_externalLink']) + for resource in resources: + + try: + ret+=resource[1].getId()+" "+argv[resource[1].getId()]+"
" + resource[1].weight=argv[resource[1].getId()][0:] + except: + """not""" + return ret + def changeWeights(self): + """change form""" + pt=PageTemplateFile('Products/ECHO_content/zpt/changeWeightForm').__of__(self) + pt.content_type="text/html" + return pt() + getSubCols = ECHO_helpers.getSubCols manage_options=Folder.manage_options+( {'label':'Main Config','action':'ECHO_copyright_configForm'}, {'label':'Reload Metadata','action':'reloadMetaDataFromStorageWarning'}, + {'label':'Change Weights','action':'changeWeights'}, ) @@ -2769,7 +3009,7 @@ class ECHO_root(Folder,Persistent,Implic def showContent(self,path): """return content/html""" - return urllib.urlopen(path+"/content_html").read() + return ECHO_helpers.urlopen(path+"/content_html").read() def getImageViewers(self): """images""" @@ -3068,6 +3308,7 @@ class ECHO_root(Folder,Persistent,Implic def xml2html(self,str,quote="yes"): """link2html fuer VLP muss hier noch raus""" + print str if str: if quote=="yes2": str=re.sub("\&","&",str) @@ -3081,8 +3322,9 @@ class ECHO_root(Folder,Persistent,Implic if self.checkRef(ref): link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref) - str= dom.toxml('utf-8') - #print str + str= dom.toxml() + + #print link.toxml('utf-8') retStr=regexpPage.search(str) return retStr.group(1)