view addDriToIndexMeta.py @ 7:78dd28ade713

error handling
author dwinter
date Fri, 02 Nov 2012 09:25:11 +0100
parents f0417a01690a
children 733d43b30a82
line wrap: on
line source

'''
Created on 01.11.2012

@author: dwinter
'''
import os
import manageIndexMetaPURLs
import re
from lxml import etree

from os.path import join, getsize

errorFile = file("/tmp/addDRIErrors.txt","w")
parseErrorFile = file("/tmp/addDRIParseErrors.txt","w")

def addPURL(fl,purl,test=False):
    try:
        tree = etree.parse(fl)
    except:
        parseErrorFile.write("PARSE ERROR:"+fl+"\n")
        return False
    dris = tree.xpath("/resource/meta/dri[@type='mpiwg']")
    
    if len(dris)==0: # erzeuge neu
        newDri = etree.Element("dri",type="mpiwg")
        newDri.text=purl
        metas=tree.xpath("/resource/meta")
        if len(metas)==0:
            parseErrorFile.write("no resource/meta: %s \n"%fl)
            return False
        else:
            metas[0].append(newDri)
    else:
        dris[0].text=purl
        
    print etree.tostring(tree, pretty_print=True)
    
    if not test:
        try:
            os.rename(fl, fl+"_mpiwg_dri")
            out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False)
            fo = file(fl,"w")
            fo.write(out)
            fo.close
        except:
            errorFile.write(fl+"\n")
    return True
    
def addDriToIndexMeta(path,delpath="",replacepath="",test=False):
    
   
    
    for root, dirs, files in os.walk(path):
    
    
        for name in files:
            if name.endswith(".meta"):
                fl=join(root, name)
                shortPath=re.sub("^"+delpath,replacepath,fl)
                purl=manageIndexMetaPURLs.IndexMetaPURLManager().getPurl(shortPath)
              
                addPURL(fl,purl,test)
                    
        if 'pageimg' in dirs:
            dirs.remove('pageimg')  # don't visit pageimf
        for dir in dirs:
            if  dir== "pageimg":
                dirs.remove('pageimg')
            if  dir.startswith("."):
                dirs.remove(dir)

if __name__ == '__main__':
     addDriToIndexMeta("/mpiwg/online/",delpath="/mpiwg/online",test=False)