changeset 39:a33fa2377075

outfiles are now set as parameter
author dwinter
date Wed, 05 Mar 2014 10:11:50 +0100
parents 6d0d7f1c11f2
children 671dd1e4bd09
files addDriToIndexMeta.py harvestToPurl.py
diffstat 2 files changed, 23 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/addDriToIndexMeta.py	Tue Mar 04 14:16:55 2014 +0100
+++ b/addDriToIndexMeta.py	Wed Mar 05 10:11:50 2014 +0100
@@ -11,9 +11,6 @@
 
 from os.path import join, getsize
 
-errorFile = file("/tmp/addDRIErrors.txt","w")
-parseErrorFile = file("/tmp/addDRIParseErrors.txt","w")
-alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w")
 
 
 def correctAuthor(tree):
@@ -36,7 +33,11 @@
     return changed
 
 
-def getDRIfromIndexMeta(fl):
+def getDRIfromIndexMeta(fl,parseErrorFile=None):
+    
+    
+    if parseErrorFile is None:
+          parseErrorFile = file("/tmp/addDRIParseErrors.txt","w")
     try:
         tree = etree.parse(fl)
     except:
@@ -51,11 +52,11 @@
         return dris[0].text
 
 
-def addPURL(fl,purl,test=False):
+def addPURL(fl,purl,efiles,test=False):
     try:
         tree = etree.parse(fl)
     except:
-        parseErrorFile.write("PARSE ERROR:"+fl+"\n")
+        efiles.parseErrorFile.write("PARSE ERROR:"+fl+"\n")
         return False
     
     dris = tree.xpath("/resource/meta/dri[@type='mpiwg']")
@@ -70,13 +71,13 @@
         newDri.text=purl
         metas=tree.xpath("/resource/meta")
         if len(metas)==0:
-            parseErrorFile.write("no resource/meta: %s \n"%fl)
+            efiles.parseErrorFile.write("no resource/meta: %s \n"%fl)
             return False
         else:
             metas[0].append(newDri)
     else:
         dris[0].text=purl
-        alreadyExistsFile.write("%s \n"%fl)
+        efiles.alreadyExistsFile.write("%s \n"%fl)
         if not changed: #nothing has to be done
             return True
 
@@ -95,10 +96,10 @@
             
             print sys.exc_info()[0]
             print sys.exc_info()[1]
-            errorFile.write(fl+"\n")
+            efiles.errorFile.write(fl+"\n")
     return True
     
-def addDriToIndexMeta(path,delpath="",replacepath="",test=False):
+def addDriToIndexMeta(path,efiles,delpath="",replacepath="",test=False):
     
     md=manageIndexMetaPURLs.IndexMetaPURLManager()
     
@@ -111,7 +112,7 @@
                 shortPath=re.sub("^"+delpath,replacepath,fl)
                 purl=md.getPurl(shortPath)
               
-                addPURL(fl,purl,test)
+                addPURL(fl,purl,test,efiles)
                     
         if 'pageimg' in dirs:
             dirs.remove('pageimg')  # don't visit pageimf
@@ -122,4 +123,11 @@
                 dirs.remove(dir)
 
 if __name__ == '__main__':
-     addDriToIndexMeta("/mpiwg/online/permanent/vlp",delpath="/mpiwg/online",test=False)
+    class ef:
+        errorFile = file("/tmp/addDRIErrors.txt","w")
+        parseErrorFile = file("/tmp/addDRIParseErrors.txt","w")
+        alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w")
+        
+    efiles = ef()
+
+    addDriToIndexMeta("/mpiwg/online/permanent/vlp",efiles,delpath="/mpiwg/online",test=False)
--- a/harvestToPurl.py	Tue Mar 04 14:16:55 2014 +0100
+++ b/harvestToPurl.py	Wed Mar 05 10:11:50 2014 +0100
@@ -11,7 +11,7 @@
 
 
 import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs
-import addDriToIndexMeta
+from addDriToIndexMeta import getDRIfromIndexMeta
 
 
 import os
@@ -45,8 +45,8 @@
             if name.endswith(".meta"):
                 fl=join(root, name)
                 
-                
-                driIndexMeta=addDriToIndexMeta.getDRIfromIndexMeta(fl)
+                parseErrorFile = file("/tmp/HarvestToPurlParseErrors.txt","w")
+                driIndexMeta=getDRIfromIndexMeta(fl,parseErrorFile=parseErrorFile)
                 
                 imagePath=createImagePath(fl,root)