# HG changeset patch
# User dwinter
# Date 1351691873 -3600
# Node ID 08a476cdcfea01ef2b8fc218fe787ebfc6c07ee9
first
diff -r 000000000000 -r 08a476cdcfea .project
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.project Wed Oct 31 14:57:53 2012 +0100
@@ -0,0 +1,17 @@
+
+
+ purlService
+
+
+
+
+
+ org.python.pydev.PyDevBuilder
+
+
+
+
+
+ org.python.pydev.pythonNature
+
+
diff -r 000000000000 -r 08a476cdcfea .pydevproject
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.pydevproject Wed Oct 31 14:57:53 2012 +0100
@@ -0,0 +1,8 @@
+
+
+
+/purlService
+
+python 2.7
+Default
+
diff -r 000000000000 -r 08a476cdcfea createPurlDB.sql
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/createPurlDB.sql Wed Oct 31 14:57:53 2012 +0100
@@ -0,0 +1,24 @@
+-- Table: purls
+
+-- DROP TABLE purls;
+
+CREATE TABLE purls
+(
+ purl text NOT NULL,
+ path text,
+ is_index_meta boolean NOT NULL,
+ created_by text,
+ created_at timestamp with time zone,
+ last_change_by text,
+ last_change_at text,
+ validity integer NOT NULL,
+ server_url text,
+ CONSTRAINT purl PRIMARY KEY (purl )
+)
+WITH (
+ OIDS=FALSE
+);
+ALTER TABLE purls
+ OWNER TO postgres;
+GRANT ALL ON TABLE purls TO postgres;
+GRANT ALL ON TABLE purls TO "purlUSER";
diff -r 000000000000 -r 08a476cdcfea harvestToPurl.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/harvestToPurl.py Wed Oct 31 14:57:53 2012 +0100
@@ -0,0 +1,69 @@
+'''
+Created on 31.10.2012
+
+@author: dwinter
+'''
+
+import manageIndexMetaPURLs
+
+
+
+import os
+from os.path import join, getsize
+import sys
+import re
+
+def harvestIndexMeta(path,user,delpath="",replacepath=""):
+
+ md = manageIndexMetaPURLs.IndexMetaPURLManager()
+
+ for root, dirs, files in os.walk(path):
+
+
+ for name in files:
+ if name.endswith(".meta"):
+
+ fl=join(root, name)
+ fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt
+
+ val,purl = md.register(fl, True, "", user)
+ if val==manageIndexMetaPURLs.ALREADY_EXISTS:
+ print "found %s -> %s"%(fl,purl)
+ else:
+ print "added %s -> %s"%(fl,purl)
+ if 'pageimg' in dirs:
+ dirs.remove('pageimg') # don't visit pageimf
+ for dir in dirs:
+ if dir== "pageimg":
+ dirs.remove('pageimg')
+ if dir.startswith("."):
+ dirs.remove(dir)
+
+
+
+
+if __name__ == '__main__':
+ args = sys.argv[1:]
+ if not (len(args)==2 or len(args)==3 or len(args)==4):
+ print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted"
+ sys.exit(2)
+ path=args[0]
+ user=args[1]
+
+ delpath=""
+ replacepath=""
+
+ if len(args)==3:
+ delpath=args[2]
+ elif len(args)==4:
+ delpath=args[2]
+ replacepath=args[3]
+
+
+ if not os.path.exists(path):
+ print "ERROR: path %s does not exist!"%path
+ sys.exit(2)
+
+ harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath)
+
+
\ No newline at end of file
diff -r 000000000000 -r 08a476cdcfea manageIndexMetaPURLs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/manageIndexMetaPURLs.py Wed Oct 31 14:57:53 2012 +0100
@@ -0,0 +1,159 @@
+'''
+Created on 31.10.2012
+
+@author: dwinter
+'''
+
+import web
+import random
+
+ALREADY_EXISTS=0
+NEW_PURL=1
+ERROR=-1
+PURL_PREFIX="MPIWG:"
+
+class IndexMetaPURLManager:
+
+ # register a new path to htmk
+ # should start with /mpiwg/online/ oder direct mit /permanent/ oder /archive/
+ # return tuple: status, result
+
+ purlDB=None
+ # DB has fields:
+ # purl purl
+ # path url or path to indexMeta
+ # is_Index_meta bolean
+ # created_by
+ # created_at
+ # last_change
+ # validity 1 if it is valid, 0 if temporary invalid, -1 if permanetly invalid
+ # server_url base_url of server
+
+ def __init__(self):
+ self.purlDB = web.database(dbn="postgres", db="purlDB",user="purlUSER",password="XXXX")
+
+
+
+ def getPath(self,purl):
+ urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl)
+ if urls is None or len(urls)==0:
+ return None
+ else:
+ return urls[0]['path']
+
+ #get purl attached tp a path or URL, return None if none.
+
+ #checke if purl exist
+ def exists(self,purl):
+ if self.getPath(purl)==None:
+ return False
+ return True
+
+
+ def getPurl(self,path):
+ urls = self.purlDB.select('"purls"',where="path='%s'"%path)
+ if urls is None or len(urls)==0:
+ return None
+ else:
+ return urls[0]['purl']
+
+
+ def generatePurl(self):
+
+
+ driEncode={ 0:'0',
+ 1:'1',
+ 2:'2',
+ 3:'3',
+ 4:'4',
+ 5:'5',
+ 6:'6',
+ 7:'7',
+ 8:'8',
+ 9:'9',
+ 10:'A',
+ 11:'B',
+ 12:'C',
+ 13:'D',
+ 14:'E',
+ 15:'F',
+ 16:'G',
+ 17:'H',
+ 18:'K',
+ 19:'M',
+ 20:'N',
+ 21:'P',
+ 22:'Q',
+ 23:'R',
+ 24:'S',
+ 25:'T',
+ 26:'U',
+ 27:'V',
+ 28:'W',
+ 29:'X',
+ 30:'Y',
+ 31:'Z'
+ }
+
+ random.seed()
+ x=[]
+ for i in range(7):
+ x.append(random.randint(0,31))
+
+ sm=0
+ for i in range(7):
+ sm+=(i+1)*x[i]
+
+ c=sm % 31
+ nid=""
+ for i in range(7):
+ nid+=driEncode[x[i]]
+ nid+=driEncode[c]
+ return PURL_PREFIX+nid
+
+
+
+
+ #generate purl and add it to the database
+ def createPurl(self,path,isIndexMeta,server_url="",user=""):
+
+ purl = self.generatePurl()
+
+ #erzeuge eine neue purl
+ while self.exists(purl):
+ purl = self.gneratePurl()
+
+
+ seq= self.purlDB.insert('purls',path=path,purl=purl,is_index_meta=isIndexMeta,
+ server_url=server_url,validity=1,created_by=user,created_at=web.SQLLiteral("NOW()"))
+
+
+ return purl
+
+ #register a new path
+ def register(self,path,isIndexMeta,server_url="",user=""):
+
+
+ #teste ob es zu dem Pfad schon eine Purl gibt
+ purl = self.getPurl(path)
+ if purl!=None:
+ return ALREADY_EXISTS,purl
+
+
+ #wenn nicht dann neue erzeugen
+ else:
+ purl = self.createPurl(path,isIndexMeta,user=user,server_url=server_url)
+ if purl!=None:
+ return NEW_PURL,purl
+
+ else:
+ return ERROR,None
+
+
+
+
+if __name__ == '__main__':
+
+ im = IndexMetaPURLManager()
+ print im.register("/tmp3/index.meta", True, "", "dwinter")
+ pass
\ No newline at end of file