Mercurial > hg > purlService
changeset 39:a33fa2377075
outfiles are now set as parameter
author | dwinter |
---|---|
date | Wed, 05 Mar 2014 10:11:50 +0100 |
parents | 6d0d7f1c11f2 |
children | 671dd1e4bd09 |
files | addDriToIndexMeta.py harvestToPurl.py |
diffstat | 2 files changed, 23 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/addDriToIndexMeta.py Tue Mar 04 14:16:55 2014 +0100 +++ b/addDriToIndexMeta.py Wed Mar 05 10:11:50 2014 +0100 @@ -11,9 +11,6 @@ from os.path import join, getsize -errorFile = file("/tmp/addDRIErrors.txt","w") -parseErrorFile = file("/tmp/addDRIParseErrors.txt","w") -alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w") def correctAuthor(tree): @@ -36,7 +33,11 @@ return changed -def getDRIfromIndexMeta(fl): +def getDRIfromIndexMeta(fl,parseErrorFile=None): + + + if parseErrorFile is None: + parseErrorFile = file("/tmp/addDRIParseErrors.txt","w") try: tree = etree.parse(fl) except: @@ -51,11 +52,11 @@ return dris[0].text -def addPURL(fl,purl,test=False): +def addPURL(fl,purl,efiles,test=False): try: tree = etree.parse(fl) except: - parseErrorFile.write("PARSE ERROR:"+fl+"\n") + efiles.parseErrorFile.write("PARSE ERROR:"+fl+"\n") return False dris = tree.xpath("/resource/meta/dri[@type='mpiwg']") @@ -70,13 +71,13 @@ newDri.text=purl metas=tree.xpath("/resource/meta") if len(metas)==0: - parseErrorFile.write("no resource/meta: %s \n"%fl) + efiles.parseErrorFile.write("no resource/meta: %s \n"%fl) return False else: metas[0].append(newDri) else: dris[0].text=purl - alreadyExistsFile.write("%s \n"%fl) + efiles.alreadyExistsFile.write("%s \n"%fl) if not changed: #nothing has to be done return True @@ -95,10 +96,10 @@ print sys.exc_info()[0] print sys.exc_info()[1] - errorFile.write(fl+"\n") + efiles.errorFile.write(fl+"\n") return True -def addDriToIndexMeta(path,delpath="",replacepath="",test=False): +def addDriToIndexMeta(path,efiles,delpath="",replacepath="",test=False): md=manageIndexMetaPURLs.IndexMetaPURLManager() @@ -111,7 +112,7 @@ shortPath=re.sub("^"+delpath,replacepath,fl) purl=md.getPurl(shortPath) - addPURL(fl,purl,test) + addPURL(fl,purl,test,efiles) if 'pageimg' in dirs: dirs.remove('pageimg') # don't visit pageimf @@ -122,4 +123,11 @@ dirs.remove(dir) if __name__ == '__main__': - addDriToIndexMeta("/mpiwg/online/permanent/vlp",delpath="/mpiwg/online",test=False) + class ef: + errorFile = file("/tmp/addDRIErrors.txt","w") + parseErrorFile = file("/tmp/addDRIParseErrors.txt","w") + alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w") + + efiles = ef() + + addDriToIndexMeta("/mpiwg/online/permanent/vlp",efiles,delpath="/mpiwg/online",test=False)
--- a/harvestToPurl.py Tue Mar 04 14:16:55 2014 +0100 +++ b/harvestToPurl.py Wed Mar 05 10:11:50 2014 +0100 @@ -11,7 +11,7 @@ import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs -import addDriToIndexMeta +from addDriToIndexMeta import getDRIfromIndexMeta import os @@ -45,8 +45,8 @@ if name.endswith(".meta"): fl=join(root, name) - - driIndexMeta=addDriToIndexMeta.getDRIfromIndexMeta(fl) + parseErrorFile = file("/tmp/HarvestToPurlParseErrors.txt","w") + driIndexMeta=getDRIfromIndexMeta(fl,parseErrorFile=parseErrorFile) imagePath=createImagePath(fl,root)