# HG changeset patch # User dwinter # Date 1351781534 -3600 # Node ID caeede0c9464280a70afd77df659ad104b994ae6 # Parent fb2a3b4542a40ef61ac20b6290f1aa61c648f788 update and redirector diff -r fb2a3b4542a4 -r caeede0c9464 harvestToPurl.py --- a/harvestToPurl.py Wed Oct 31 21:54:55 2012 +0100 +++ b/harvestToPurl.py Thu Nov 01 15:52:14 2012 +0100 @@ -12,8 +12,9 @@ from os.path import join, getsize import sys import re +from lxml import etree -def harvestIndexMeta(path,user,delpath="",replacepath=""): +def harvestIndexMeta(path,user,delpath="",replacepath="", update=False): md = manageIndexMetaPURLs.IndexMetaPURLManager() @@ -22,15 +23,22 @@ for name in files: if name.endswith(".meta"): - fl=join(root, name) + + imagePath=createImagePath(fl,root) + imagePath=re.sub("^"+delpath,replacepath,imagePath) + fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt - val,purl = md.register(fl, True, "", user) + val,purl = md.register(fl, True, user=user,imagePath=imagePath,update=update) if val==manageIndexMetaPURLs.ALREADY_EXISTS: print "found %s -> %s"%(fl,purl) + + elif val==manageIndexMetaPURLs.UPDATED: + print "updated %s -> %s"%(fl,purl) else: print "added %s -> %s"%(fl,purl) + if 'pageimg' in dirs: dirs.remove('pageimg') # don't visit pageimf for dir in dirs: @@ -40,7 +48,29 @@ dirs.remove(dir) +# erzeugt einen imagepath wenn kein texttooltag existiert +def createImagePath(path,root): + tree= etree.parse(path) + + #teste ob texttool tag, dann kein imagePath + tt =tree.xpath('//texttool') + if len(tt)>0: + return "" + + #im anderen fall, heuristic + + imageFolders=["pageimg","pages"] + + for imageFolder in imageFolders: + fl=join(root, imageFolder) + if os.path.exists(fl): # gibt es einen der folder + return fl + + return "" + + + if __name__ == '__main__': args = sys.argv[1:] @@ -64,6 +94,6 @@ print "ERROR: path %s does not exist!"%path sys.exit(2) - harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath) + harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath,update=True) \ No newline at end of file diff -r fb2a3b4542a4 -r caeede0c9464 manageIndexMetaPURLs.py --- a/manageIndexMetaPURLs.py Wed Oct 31 21:54:55 2012 +0100 +++ b/manageIndexMetaPURLs.py Thu Nov 01 15:52:14 2012 +0100 @@ -10,6 +10,8 @@ ALREADY_EXISTS=0 NEW_PURL=1 ERROR=-1 +UPDATED=2 + PURL_PREFIX="MPIWG:" VALID=1 @@ -26,12 +28,14 @@ # DB has fields: # purl purl # path url or path to indexMeta + # imagePath dezidierter Pfad auf images. # is_Index_meta bolean # created_by # created_at # last_change # validity 1 if it is valid, 0 if temporary invalid, -1 if permanetly invalid # server_url base_url of server + def __init__(self): self.purlDB = web.database(dbn="postgres", db="purlDB",user="purlUSER",password="3333") @@ -45,6 +49,13 @@ else: return urls[0]['path'] + def getImagePath(self,purl): + urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl) + if urls is None or len(urls)==0: + return None + else: + return urls[0]['image_path'] + def isIndexMeta(self,purl): urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl) if urls is None or len(urls)==0: @@ -52,6 +63,14 @@ else: return urls[0]['is_index_meta'] + def getImagePathValidity(self,purl): + urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl) + if urls is None or len(urls)==0: + return None,-1 + else: + res = urls[0] + return res['image_path'],res['validity'] + def getPathValidity(self,purl): urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl) @@ -136,7 +155,7 @@ #generate purl and add it to the database - def createPurl(self,path,isIndexMeta,server_url="",user=""): + def createPurl(self,path,isIndexMeta,imagePath="",server_url="",user=""): purl = self.generatePurl() @@ -145,25 +164,43 @@ purl = self.gneratePurl() - seq= self.purlDB.insert('purls',path=path,purl=purl,is_index_meta=isIndexMeta, + seq= self.purlDB.insert('purls',path=path,purl=purl,is_index_meta=isIndexMeta, image_path=imagePath, server_url=server_url,validity=1,created_by=user,created_at=web.SQLLiteral("NOW()")) return purl #register a new path - def register(self,path,isIndexMeta,server_url="",user=""): + + + def updatePurl(self,purl,isIndexMeta,path="",imagePath="",server_url="",user=""): + + update= self.purlDB.update('purls',where="purl = '%s'"%web.sqlparam(purl),path=path,is_index_meta=isIndexMeta, image_path=imagePath, + server_url=server_url,validity=1,last_change_by=user,last_change_at=web.SQLLiteral("NOW()")) + + + return update + + def register(self,path,isIndexMeta,imagePath="",server_url="",user="",update=False): #teste ob es zu dem Pfad schon eine Purl gibt purl = self.getPurl(path) if purl!=None: + + if update: + up= self.updatePurl(purl, isIndexMeta, path, imagePath, server_url, user) + if up>0: + return UPDATED,purl + else: + return ERROR,None + return ALREADY_EXISTS,purl #wenn nicht dann neue erzeugen else: - purl = self.createPurl(path,isIndexMeta,user=user,server_url=server_url) + purl = self.createPurl(path,isIndexMeta,imagePath=imagePath,user=user,server_url=server_url) if purl!=None: return NEW_PURL,purl diff -r fb2a3b4542a4 -r caeede0c9464 redirector.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/redirector.py Thu Nov 01 15:52:14 2012 +0100 @@ -0,0 +1,95 @@ +''' +redirects depending on a configuration file an index.meta purl to an viewer +Created on 01.11.2012 + +@author: dwinter +''' + +import web +import manageIndexMetaPURLs +import logging + +class redirector: + + viewers={} # hash mit allen viewer name --> urls + purlHandler=None + def __init__(self): + #read config file for the viewers + confFile= file("viewer.config") + + self.purlHandler = manageIndexMetaPURLs.IndexMetaPURLManager() + + for line in confFile.readlines(): + splitted=line.split(",") + + list=[] + if splitted[1]=="": + list.append(None) # index.meta werden von dieser einstellung nicht interpretiert + else: + list.append(splitted[1]) + + if len(splitted)>1: # url fur image viewer + if splitted[2]=="": + list.append(None) # index.meta werden von dieser einstellung nicht interpretiert + else: + list.append(splitted[2]) + else: + list.append(None) # null wenn keiner konfiguriert wird. TODO: handle this + + + self.viewers[splitted[0]]=list + + def GET(self,path): + + splitted=path.split("/") + if len(splitted)!=2: #pfrad sollte zwei anteile habe "flavour/purl" + raise web.notfound("not found") + + purl = splitted[1] + flavour = splitted[0] + + if flavour not in self.viewers.keys(): + raise web.notfound("no viewer for %s"%flavour) + + formats = self.viewers[flavour] + + viewerWithIndexMetaFormatString = formats[0]; + viewerWithImagePathFormatString = formats[1]; + + + # checke ob es einen Image path gibt + path,validity = self.purlHandler.getImagePathValidity(purl) + if path is not None and path!="": + return self.handlePath(path,validity,viewerWithImagePathFormatString) + + + path,validity = self.purlHandler.getPathValidity(purl) + + if path is not None and path !="": + return self.handlePath(path,validity,viewerWithIndexMetaFormatString) + + + + #handle path + def handlePath(self,path,validity,viewerFormatString): + + if viewerFormatString is None or viewerFormatString=="": + raise web.internalerror("no viewer configure for indexMeta for this flavour") + + if path is None: + raise web.notfound("Cannnot find a URL to this path") + + if validity is manageIndexMetaPURLs.PERM_NON_VALID: + raise web.notfound("PURL NON VALID ANYMORE!") + + if validity is manageIndexMetaPURLs.TEMP_NON_VALID: + return web.notfound("PURL currently not VALID try later!") + + + viewerUrl = viewerFormatString%path + + print viewerUrl + raise web.redirect(viewerUrl,"302 found") + +if __name__ == '__main__': + pass \ No newline at end of file diff -r fb2a3b4542a4 -r caeede0c9464 restService.py --- a/restService.py Wed Oct 31 21:54:55 2012 +0100 +++ b/restService.py Thu Nov 01 15:52:14 2012 +0100 @@ -5,9 +5,12 @@ ''' import web import manageIndexMetaPURLs +from redirector import redirector +import logging urls = ( - '/purl/(.+)','purl' + '/purl/(.+)','purl', + '/docuviewer/(.+)','redirector', ) app = web.application(urls, globals()) diff -r fb2a3b4542a4 -r caeede0c9464 viewer.config --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/viewer.config Thu Nov 01 15:52:14 2012 +0100 @@ -0,0 +1,3 @@ +echo,http://echo.mpiwg-berlin.mpg.de/ECHOdocuViewfull?url=%s,http://echo.mpiwg-berlin.mpg.de/ECHOdocuViewfull?mode=imagepath&url=%s&viewMode=images +libcoll,,http://libcoll.mpiwg-berlin.mpg.de/libview?url=%s&mode=imagepath +digilib,,http://digilib.mpiwg-berlin.mpg.de/digitallibrary/jquery/digilib.html?fn=%s