changeset 0:08a476cdcfea

first
author dwinter
date Wed, 31 Oct 2012 14:57:53 +0100
parents
children fef9ad4020ee
files .project .pydevproject createPurlDB.sql harvestToPurl.py manageIndexMetaPURLs.py
diffstat 5 files changed, 277 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.project	Wed Oct 31 14:57:53 2012 +0100
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>purlService</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.python.pydev.PyDevBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.python.pydev.pythonNature</nature>
+	</natures>
+</projectDescription>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.pydevproject	Wed Oct 31 14:57:53 2012 +0100
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?eclipse-pydev version="1.0"?><pydev_project>
+<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
+<path>/purlService</path>
+</pydev_pathproperty>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
+</pydev_project>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/createPurlDB.sql	Wed Oct 31 14:57:53 2012 +0100
@@ -0,0 +1,24 @@
+-- Table: purls
+
+-- DROP TABLE purls;
+
+CREATE TABLE purls
+(
+  purl text NOT NULL,
+  path text,
+  is_index_meta boolean NOT NULL,
+  created_by text,
+  created_at timestamp with time zone,
+  last_change_by text,
+  last_change_at text,
+  validity integer NOT NULL,
+  server_url text,
+  CONSTRAINT purl PRIMARY KEY (purl )
+)
+WITH (
+  OIDS=FALSE
+);
+ALTER TABLE purls
+  OWNER TO postgres;
+GRANT ALL ON TABLE purls TO postgres;
+GRANT ALL ON TABLE purls TO "purlUSER";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/harvestToPurl.py	Wed Oct 31 14:57:53 2012 +0100
@@ -0,0 +1,69 @@
+'''
+Created on 31.10.2012
+
+@author: dwinter
+'''
+
+import manageIndexMetaPURLs
+
+
+
+import os
+from os.path import join, getsize
+import sys
+import re
+
+def harvestIndexMeta(path,user,delpath="",replacepath=""):
+    
+    md = manageIndexMetaPURLs.IndexMetaPURLManager()
+    
+    for root, dirs, files in os.walk(path):
+    
+    
+        for name in files:
+            if name.endswith(".meta"):
+                
+                fl=join(root, name)
+                fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt
+                
+                val,purl = md.register(fl, True, "", user)
+                if val==manageIndexMetaPURLs.ALREADY_EXISTS:
+                    print "found %s -> %s"%(fl,purl)
+                else:
+                    print "added %s -> %s"%(fl,purl)
+        if 'pageimg' in dirs:
+            dirs.remove('pageimg')  # don't visit pageimf
+        for dir in dirs:
+            if  dir== "pageimg":
+                dirs.remove('pageimg')
+            if  dir.startswith("."):
+                dirs.remove(dir)
+
+
+
+
+if __name__ == '__main__':
+    args = sys.argv[1:]
+    if not (len(args)==2 or len(args)==3 or len(args)==4):
+        print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted"
+        sys.exit(2)
+    path=args[0]
+    user=args[1]
+    
+    delpath=""
+    replacepath=""
+    
+    if len(args)==3:
+        delpath=args[2]
+    elif len(args)==4:
+        delpath=args[2]
+        replacepath=args[3]
+       
+        
+    if not os.path.exists(path):
+        print "ERROR: path %s does not exist!"%path
+        sys.exit(2)
+        
+    harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath)
+    
+    
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/manageIndexMetaPURLs.py	Wed Oct 31 14:57:53 2012 +0100
@@ -0,0 +1,159 @@
+'''
+Created on 31.10.2012
+
+@author: dwinter
+'''
+
+import web
+import random
+
+ALREADY_EXISTS=0
+NEW_PURL=1
+ERROR=-1
+PURL_PREFIX="MPIWG:"
+    
+class IndexMetaPURLManager:
+    
+    # register a new path to htmk
+    # should start with  /mpiwg/online/ oder direct mit /permanent/ oder /archive/
+    # return tuple:  status, result
+    
+    purlDB=None
+    # DB has fields:
+    # purl purl
+    # path url or path to indexMeta
+    # is_Index_meta bolean
+    # created_by
+    # created_at
+    # last_change
+    # validity 1 if it is valid, 0 if temporary invalid, -1 if permanetly invalid
+    # server_url base_url of server
+  
+    def __init__(self):
+        self.purlDB = web.database(dbn="postgres", db="purlDB",user="purlUSER",password="XXXX")
+      
+    
+    
+    def getPath(self,purl):  
+        urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl)
+        if urls is None or len(urls)==0:
+            return None
+        else:
+            return urls[0]['path']
+          
+    #get purl attached tp a path or URL, return None if none.
+    
+    #checke if purl exist
+    def exists(self,purl):
+        if self.getPath(purl)==None:
+            return False
+        return True
+            
+    
+    def getPurl(self,path):  
+        urls = self.purlDB.select('"purls"',where="path='%s'"%path)
+        if urls is None or len(urls)==0:
+            return None
+        else:
+            return urls[0]['purl']
+        
+    
+    def generatePurl(self):
+        
+        
+        driEncode={ 0:'0',
+                    1:'1',
+                    2:'2',
+                    3:'3',
+                    4:'4',
+                    5:'5',
+                    6:'6',
+                    7:'7',
+                    8:'8',
+                    9:'9',
+                    10:'A',
+                    11:'B',
+                    12:'C',
+                    13:'D',
+                    14:'E',
+                    15:'F',
+                    16:'G',
+                    17:'H',
+                    18:'K',
+                    19:'M',
+                    20:'N',
+                    21:'P',
+                    22:'Q',
+                    23:'R',
+                    24:'S',
+                    25:'T',
+                    26:'U',
+                    27:'V',
+                    28:'W',
+                    29:'X',
+                    30:'Y',
+                    31:'Z'
+                    }
+        
+        random.seed()
+        x=[]
+        for i in range(7):
+            x.append(random.randint(0,31))
+
+        sm=0
+        for i in range(7):
+            sm+=(i+1)*x[i]
+            
+        c=sm % 31
+        nid=""
+        for i in range(7):
+            nid+=driEncode[x[i]]
+        nid+=driEncode[c]
+        return PURL_PREFIX+nid
+        
+
+        
+    
+    #generate purl and add it to the database
+    def createPurl(self,path,isIndexMeta,server_url="",user=""):
+        
+        purl = self.generatePurl()
+        
+        #erzeuge eine neue purl
+        while self.exists(purl):
+            purl = self.gneratePurl()
+        
+        
+        seq= self.purlDB.insert('purls',path=path,purl=purl,is_index_meta=isIndexMeta,
+                                server_url=server_url,validity=1,created_by=user,created_at=web.SQLLiteral("NOW()"))
+        
+        
+        return purl
+        
+    #register a new path
+    def register(self,path,isIndexMeta,server_url="",user=""):
+        
+        
+        #teste ob es zu dem Pfad schon eine Purl gibt
+        purl = self.getPurl(path)
+        if purl!=None:
+            return ALREADY_EXISTS,purl
+        
+        
+        #wenn nicht dann neue erzeugen
+        else:
+            purl = self.createPurl(path,isIndexMeta,user=user,server_url=server_url)
+            if purl!=None:
+                return NEW_PURL,purl
+        
+            else:
+                return ERROR,None
+
+
+
+
+if __name__ == '__main__':
+    
+    im = IndexMetaPURLManager()
+    print im.register("/tmp3/index.meta", True, "", "dwinter")
+    pass
\ No newline at end of file