# HG changeset patch # User dwinter # Date 1351691873 -3600 # Node ID 08a476cdcfea01ef2b8fc218fe787ebfc6c07ee9 first diff -r 000000000000 -r 08a476cdcfea .project --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.project Wed Oct 31 14:57:53 2012 +0100 @@ -0,0 +1,17 @@ + + + purlService + + + + + + org.python.pydev.PyDevBuilder + + + + + + org.python.pydev.pythonNature + + diff -r 000000000000 -r 08a476cdcfea .pydevproject --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.pydevproject Wed Oct 31 14:57:53 2012 +0100 @@ -0,0 +1,8 @@ + + + +/purlService + +python 2.7 +Default + diff -r 000000000000 -r 08a476cdcfea createPurlDB.sql --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/createPurlDB.sql Wed Oct 31 14:57:53 2012 +0100 @@ -0,0 +1,24 @@ +-- Table: purls + +-- DROP TABLE purls; + +CREATE TABLE purls +( + purl text NOT NULL, + path text, + is_index_meta boolean NOT NULL, + created_by text, + created_at timestamp with time zone, + last_change_by text, + last_change_at text, + validity integer NOT NULL, + server_url text, + CONSTRAINT purl PRIMARY KEY (purl ) +) +WITH ( + OIDS=FALSE +); +ALTER TABLE purls + OWNER TO postgres; +GRANT ALL ON TABLE purls TO postgres; +GRANT ALL ON TABLE purls TO "purlUSER"; diff -r 000000000000 -r 08a476cdcfea harvestToPurl.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/harvestToPurl.py Wed Oct 31 14:57:53 2012 +0100 @@ -0,0 +1,69 @@ +''' +Created on 31.10.2012 + +@author: dwinter +''' + +import manageIndexMetaPURLs + + + +import os +from os.path import join, getsize +import sys +import re + +def harvestIndexMeta(path,user,delpath="",replacepath=""): + + md = manageIndexMetaPURLs.IndexMetaPURLManager() + + for root, dirs, files in os.walk(path): + + + for name in files: + if name.endswith(".meta"): + + fl=join(root, name) + fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt + + val,purl = md.register(fl, True, "", user) + if val==manageIndexMetaPURLs.ALREADY_EXISTS: + print "found %s -> %s"%(fl,purl) + else: + print "added %s -> %s"%(fl,purl) + if 'pageimg' in dirs: + dirs.remove('pageimg') # don't visit pageimf + for dir in dirs: + if dir== "pageimg": + dirs.remove('pageimg') + if dir.startswith("."): + dirs.remove(dir) + + + + +if __name__ == '__main__': + args = sys.argv[1:] + if not (len(args)==2 or len(args)==3 or len(args)==4): + print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted" + sys.exit(2) + path=args[0] + user=args[1] + + delpath="" + replacepath="" + + if len(args)==3: + delpath=args[2] + elif len(args)==4: + delpath=args[2] + replacepath=args[3] + + + if not os.path.exists(path): + print "ERROR: path %s does not exist!"%path + sys.exit(2) + + harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath) + + \ No newline at end of file diff -r 000000000000 -r 08a476cdcfea manageIndexMetaPURLs.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/manageIndexMetaPURLs.py Wed Oct 31 14:57:53 2012 +0100 @@ -0,0 +1,159 @@ +''' +Created on 31.10.2012 + +@author: dwinter +''' + +import web +import random + +ALREADY_EXISTS=0 +NEW_PURL=1 +ERROR=-1 +PURL_PREFIX="MPIWG:" + +class IndexMetaPURLManager: + + # register a new path to htmk + # should start with /mpiwg/online/ oder direct mit /permanent/ oder /archive/ + # return tuple: status, result + + purlDB=None + # DB has fields: + # purl purl + # path url or path to indexMeta + # is_Index_meta bolean + # created_by + # created_at + # last_change + # validity 1 if it is valid, 0 if temporary invalid, -1 if permanetly invalid + # server_url base_url of server + + def __init__(self): + self.purlDB = web.database(dbn="postgres", db="purlDB",user="purlUSER",password="XXXX") + + + + def getPath(self,purl): + urls = self.purlDB.select('"purls"' ,where="purl='%s'"%purl) + if urls is None or len(urls)==0: + return None + else: + return urls[0]['path'] + + #get purl attached tp a path or URL, return None if none. + + #checke if purl exist + def exists(self,purl): + if self.getPath(purl)==None: + return False + return True + + + def getPurl(self,path): + urls = self.purlDB.select('"purls"',where="path='%s'"%path) + if urls is None or len(urls)==0: + return None + else: + return urls[0]['purl'] + + + def generatePurl(self): + + + driEncode={ 0:'0', + 1:'1', + 2:'2', + 3:'3', + 4:'4', + 5:'5', + 6:'6', + 7:'7', + 8:'8', + 9:'9', + 10:'A', + 11:'B', + 12:'C', + 13:'D', + 14:'E', + 15:'F', + 16:'G', + 17:'H', + 18:'K', + 19:'M', + 20:'N', + 21:'P', + 22:'Q', + 23:'R', + 24:'S', + 25:'T', + 26:'U', + 27:'V', + 28:'W', + 29:'X', + 30:'Y', + 31:'Z' + } + + random.seed() + x=[] + for i in range(7): + x.append(random.randint(0,31)) + + sm=0 + for i in range(7): + sm+=(i+1)*x[i] + + c=sm % 31 + nid="" + for i in range(7): + nid+=driEncode[x[i]] + nid+=driEncode[c] + return PURL_PREFIX+nid + + + + + #generate purl and add it to the database + def createPurl(self,path,isIndexMeta,server_url="",user=""): + + purl = self.generatePurl() + + #erzeuge eine neue purl + while self.exists(purl): + purl = self.gneratePurl() + + + seq= self.purlDB.insert('purls',path=path,purl=purl,is_index_meta=isIndexMeta, + server_url=server_url,validity=1,created_by=user,created_at=web.SQLLiteral("NOW()")) + + + return purl + + #register a new path + def register(self,path,isIndexMeta,server_url="",user=""): + + + #teste ob es zu dem Pfad schon eine Purl gibt + purl = self.getPurl(path) + if purl!=None: + return ALREADY_EXISTS,purl + + + #wenn nicht dann neue erzeugen + else: + purl = self.createPurl(path,isIndexMeta,user=user,server_url=server_url) + if purl!=None: + return NEW_PURL,purl + + else: + return ERROR,None + + + + +if __name__ == '__main__': + + im = IndexMetaPURLManager() + print im.register("/tmp3/index.meta", True, "", "dwinter") + pass \ No newline at end of file