changeset 3:caeede0c9464

update and redirector
author dwinter
date Thu, 01 Nov 2012 15:52:14 +0100
parents fb2a3b4542a4
children 107f13ca333b
files harvestToPurl.py manageIndexMetaPURLs.py redirector.py restService.py viewer.config
diffstat 5 files changed, 177 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/harvestToPurl.py	Wed Oct 31 21:54:55 2012 +0100
+++ b/harvestToPurl.py	Thu Nov 01 15:52:14 2012 +0100
@@ -12,8 +12,9 @@
 from os.path import join, getsize
 import sys
 import re
+from lxml import etree
 
-def harvestIndexMeta(path,user,delpath="",replacepath=""):
+def harvestIndexMeta(path,user,delpath="",replacepath="", update=False):
     
     md = manageIndexMetaPURLs.IndexMetaPURLManager()
     
@@ -22,15 +23,22 @@
     
         for name in files:
             if name.endswith(".meta"):
-                
                 fl=join(root, name)
+                  
+                imagePath=createImagePath(fl,root)
+                imagePath=re.sub("^"+delpath,replacepath,imagePath)
+              
                 fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt
                 
-                val,purl = md.register(fl, True, "", user)
+                val,purl = md.register(fl, True,  user=user,imagePath=imagePath,update=update)
                 if val==manageIndexMetaPURLs.ALREADY_EXISTS:
                     print "found %s -> %s"%(fl,purl)
+                
+                elif val==manageIndexMetaPURLs.UPDATED:
+                    print "updated %s -> %s"%(fl,purl)
                 else:
                     print "added %s -> %s"%(fl,purl)
+                    
         if 'pageimg' in dirs:
             dirs.remove('pageimg')  # don't visit pageimf
         for dir in dirs:
@@ -40,7 +48,29 @@
                 dirs.remove(dir)
 
 
+# erzeugt einen imagepath wenn kein texttooltag existiert
+def createImagePath(path,root):
+    tree= etree.parse(path)
+    
+    #teste ob texttool tag, dann kein imagePath
+    tt =tree.xpath('//texttool')
+    if len(tt)>0:
+        return ""
 
+    
+    #im anderen fall, heuristic
+    
+    imageFolders=["pageimg","pages"]
+    
+    for imageFolder in imageFolders:
+        fl=join(root, imageFolder)
+        if os.path.exists(fl): # gibt es einen der folder
+            return fl
+        
+    return ""
+            
+    
+    
 
 if __name__ == '__main__':
     args = sys.argv[1:]
@@ -64,6 +94,6 @@
         print "ERROR: path %s does not exist!"%path
         sys.exit(2)
         
-    harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath)
+    harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath,update=True)
     
     
\ No newline at end of file
--- a/manageIndexMetaPURLs.py	Wed Oct 31 21:54:55 2012 +0100
+++ b/manageIndexMetaPURLs.py	Thu Nov 01 15:52:14 2012 +0100
@@ -10,6 +10,8 @@
 ALREADY_EXISTS=0
 NEW_PURL=1
 ERROR=-1
+UPDATED=2
+
 PURL_PREFIX="MPIWG:"
 
 VALID=1
@@ -26,12 +28,14 @@
     # DB has fields:
     # purl purl
     # path url or path to indexMeta
+    # imagePath dezidierter Pfad auf images.
     # is_Index_meta bolean
     # created_by
     # created_at
     # last_change
     # validity 1 if it is valid, 0 if temporary invalid, -1 if permanetly invalid
     # server_url base_url of server
+    
   
     def __init__(self):
         self.purlDB = web.database(dbn="postgres", db="purlDB",user="purlUSER",password="3333")
@@ -45,6 +49,13 @@
         else:
             return urls[0]['path']
         
+    def getImagePath(self,purl):
+        urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl)
+        if urls is None or len(urls)==0:
+            return None
+        else:
+            return urls[0]['image_path']
+        
     def isIndexMeta(self,purl):  
         urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl)
         if urls is None or len(urls)==0:
@@ -52,6 +63,14 @@
         else:
             return urls[0]['is_index_meta']
           
+    def getImagePathValidity(self,purl):  
+        urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl)
+        if urls is None or len(urls)==0:
+            return None,-1
+        else:
+            res = urls[0]
+            return res['image_path'],res['validity']
+    
     
     def getPathValidity(self,purl):  
         urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl)
@@ -136,7 +155,7 @@
         
     
     #generate purl and add it to the database
-    def createPurl(self,path,isIndexMeta,server_url="",user=""):
+    def createPurl(self,path,isIndexMeta,imagePath="",server_url="",user=""):
         
         purl = self.generatePurl()
         
@@ -145,25 +164,43 @@
             purl = self.gneratePurl()
         
         
-        seq= self.purlDB.insert('purls',path=path,purl=purl,is_index_meta=isIndexMeta,
+        seq= self.purlDB.insert('purls',path=path,purl=purl,is_index_meta=isIndexMeta, image_path=imagePath,
                                 server_url=server_url,validity=1,created_by=user,created_at=web.SQLLiteral("NOW()"))
         
         
         return purl
         
     #register a new path
-    def register(self,path,isIndexMeta,server_url="",user=""):
+    
+    
+    def updatePurl(self,purl,isIndexMeta,path="",imagePath="",server_url="",user=""):
+        
+        update= self.purlDB.update('purls',where="purl = '%s'"%web.sqlparam(purl),path=path,is_index_meta=isIndexMeta, image_path=imagePath,
+                                server_url=server_url,validity=1,last_change_by=user,last_change_at=web.SQLLiteral("NOW()"))
+        
+        
+        return update
+    
+    def register(self,path,isIndexMeta,imagePath="",server_url="",user="",update=False):
         
         
         #teste ob es zu dem Pfad schon eine Purl gibt
         purl = self.getPurl(path)
         if purl!=None:
+            
+            if update:
+                up= self.updatePurl(purl, isIndexMeta, path, imagePath, server_url, user)
+                if up>0:
+                    return UPDATED,purl
+                else:
+                    return ERROR,None
+                
             return ALREADY_EXISTS,purl
         
         
         #wenn nicht dann neue erzeugen
         else:
-            purl = self.createPurl(path,isIndexMeta,user=user,server_url=server_url)
+            purl = self.createPurl(path,isIndexMeta,imagePath=imagePath,user=user,server_url=server_url)
             if purl!=None:
                 return NEW_PURL,purl
         
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/redirector.py	Thu Nov 01 15:52:14 2012 +0100
@@ -0,0 +1,95 @@
+'''
+redirects depending on a configuration file an index.meta purl to an viewer
+Created on 01.11.2012
+
+@author: dwinter
+'''
+
+import web
+import manageIndexMetaPURLs
+import logging
+
+class redirector:
+    
+    viewers={} # hash mit allen viewer name --> urls
+    purlHandler=None
+    def __init__(self):
+        #read config file for the viewers
+        confFile= file("viewer.config")
+        
+        self.purlHandler = manageIndexMetaPURLs.IndexMetaPURLManager()
+        
+        for line in confFile.readlines():
+            splitted=line.split(",")
+            
+            list=[]
+            if splitted[1]=="":
+                list.append(None) # index.meta werden von dieser einstellung nicht interpretiert
+            else:
+                list.append(splitted[1])
+                
+            if len(splitted)>1: # url fur image viewer
+                if splitted[2]=="":
+                    list.append(None) # index.meta werden von dieser einstellung nicht interpretiert
+                else:
+                    list.append(splitted[2])
+            else:
+                list.append(None) # null wenn keiner konfiguriert wird. TODO: handle this
+            
+            
+            self.viewers[splitted[0]]=list
+                
+    def GET(self,path):
+        
+        splitted=path.split("/")
+        if len(splitted)!=2: #pfrad sollte zwei anteile habe "flavour/purl"
+            raise web.notfound("not found")
+        
+        purl = splitted[1] 
+        flavour = splitted[0]
+        
+        if flavour not in self.viewers.keys():
+            raise web.notfound("no viewer for %s"%flavour)
+        
+        formats = self.viewers[flavour]
+        
+        viewerWithIndexMetaFormatString =  formats[0];
+        viewerWithImagePathFormatString = formats[1];
+        
+      
+        # checke ob es einen Image path gibt
+        path,validity = self.purlHandler.getImagePathValidity(purl)
+        if path is not None and path!="":
+            return self.handlePath(path,validity,viewerWithImagePathFormatString)
+        
+        
+        path,validity = self.purlHandler.getPathValidity(purl)
+        
+        if path is not None and path !="":
+            return self.handlePath(path,validity,viewerWithIndexMetaFormatString)
+        
+    
+    
+    #handle path 
+    def handlePath(self,path,validity,viewerFormatString):
+        
+        if viewerFormatString is None or viewerFormatString=="": 
+            raise web.internalerror("no viewer configure for indexMeta for this flavour")
+        
+        if path is None:
+            raise web.notfound("Cannnot find a URL to this path")
+        
+        if validity is manageIndexMetaPURLs.PERM_NON_VALID:
+            raise web.notfound("PURL NON VALID ANYMORE!")
+         
+        if validity is manageIndexMetaPURLs.TEMP_NON_VALID:
+            return web.notfound("PURL currently not VALID try later!")
+        
+    
+        viewerUrl = viewerFormatString%path
+        
+        print viewerUrl
+        raise web.redirect(viewerUrl,"302 found")
+        
+if __name__ == '__main__':
+    pass
\ No newline at end of file
--- a/restService.py	Wed Oct 31 21:54:55 2012 +0100
+++ b/restService.py	Thu Nov 01 15:52:14 2012 +0100
@@ -5,9 +5,12 @@
 '''
 import web
 import manageIndexMetaPURLs
+from redirector import redirector
+import logging
 
 urls = (
-    '/purl/(.+)','purl'
+    '/purl/(.+)','purl',
+    '/docuviewer/(.+)','redirector',
 )
 
 app = web.application(urls, globals())
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/viewer.config	Thu Nov 01 15:52:14 2012 +0100
@@ -0,0 +1,3 @@
+echo,http://echo.mpiwg-berlin.mpg.de/ECHOdocuViewfull?url=%s,http://echo.mpiwg-berlin.mpg.de/ECHOdocuViewfull?mode=imagepath&url=%s&viewMode=images
+libcoll,,http://libcoll.mpiwg-berlin.mpg.de/libview?url=%s&mode=imagepath
+digilib,,http://digilib.mpiwg-berlin.mpg.de/digitallibrary/jquery/digilib.html?fn=%s