view addDriToIndexMeta.py @ 6:f0417a01690a

error handling if file renaming not possible
author dwinter
date Fri, 02 Nov 2012 09:06:44 +0100
parents 3ebe37d81071
children 78dd28ade713
line wrap: on
line source

'''
Created on 01.11.2012

@author: dwinter
'''
import os
import manageIndexMetaPURLs
import re
from lxml import etree

from os.path import join, getsize

errorFile = file("/tmp/addDRIErrors.txt","w")

def addPURL(fl,purl,test=False):
    tree = etree.parse(fl)
    
    dris = tree.xpath("/resource/meta/dri[@type='mpiwg']")
    
    if len(dris)==0: # erzeuge neu
        newDri = etree.Element("dri",type="mpiwg")
        newDri.text=purl
        metas=tree.xpath("/resource/meta")
        if len(metas)==0:
            print "no resource/meta in %s"%fl
            return False
        else:
            metas[0].append(newDri)
    else:
        dris[0].text=purl
        
    print etree.tostring(tree, pretty_print=True)
    
    if not test:
        try:
            os.rename(fl, fl+"_mpiwg_dri")
            out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False)
            fo = file(fl,"w")
            fo.write(out)
            fo.close
        except:
            errorFile.write(fl+"\n")
    return True
    
def addDriToIndexMeta(path,delpath="",replacepath="",test=False):
    
   
    
    for root, dirs, files in os.walk(path):
    
    
        for name in files:
            if name.endswith(".meta"):
                fl=join(root, name)
                shortPath=re.sub("^"+delpath,replacepath,fl)
                purl=manageIndexMetaPURLs.IndexMetaPURLManager().getPurl(shortPath)
              
                addPURL(fl,purl,test)
                    
        if 'pageimg' in dirs:
            dirs.remove('pageimg')  # don't visit pageimf
        for dir in dirs:
            if  dir== "pageimg":
                dirs.remove('pageimg')
            if  dir.startswith("."):
                dirs.remove(dir)

if __name__ == '__main__':
     addDriToIndexMeta("/mpiwg/online/permanent/annalen",delpath="/mpiwg/online")