view addDriToIndexMeta.py @ 35:d3ecbfd21e06

Merge with a25bfc49a068371c555ac034df1a24e349850163
author dwinter
date Wed, 23 Oct 2013 12:28:22 +0200
parents 0190f49bce88
children be8640c08d99
line wrap: on
line source

'''
Created on 01.11.2012

@author: dwinter
'''
import os
import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs 
import re
from lxml import etree
import sys

from os.path import join, getsize

errorFile = file("/tmp/addDRIErrors.txt","w")
parseErrorFile = file("/tmp/addDRIParseErrors.txt","w")
alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w")


def correctAuthor(tree):
    """ersetzt in den autor felder "\r" durch ;"""
    
    
    authors = tree.xpath("/resource/meta/bib/author")
    
    changed = False
    for author in authors:

        if author.text is not None:
            splitted =author.text.split("\n")
            txt = "; ".join(splitted)
    
            if txt!=author.text:
                author.text=txt
                changed=True
    
    return changed
    
def addPURL(fl,purl,test=False):
    try:
        tree = etree.parse(fl)
    except:
        parseErrorFile.write("PARSE ERROR:"+fl+"\n")
        return False
    
    dris = tree.xpath("/resource/meta/dri[@type='mpiwg']")
    
    
    changed = correctAuthor(tree)
    
    
    
    if len(dris)==0: # erzeuge neu
        newDri = etree.Element("dri",type="mpiwg")
        newDri.text=purl
        metas=tree.xpath("/resource/meta")
        if len(metas)==0:
            parseErrorFile.write("no resource/meta: %s \n"%fl)
            return False
        else:
            metas[0].append(newDri)
    else:
        dris[0].text=purl
        alreadyExistsFile.write("%s \n"%fl)
        if not changed: #nothing has to be done
            return True

    print etree.tostring(tree, pretty_print=True)
    
    
    if not test:
        try:
          
            os.rename(fl, fl+"_mpiwg_dri")
            out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False)
            fo = file(fl,"w")
            fo.write(out)
            fo.close
        except:
            
            print sys.exc_info()[0]
            print sys.exc_info()[1]
            errorFile.write(fl+"\n")
    return True
    
def addDriToIndexMeta(path,delpath="",replacepath="",test=False):
    
    md=manageIndexMetaPURLs.IndexMetaPURLManager()
    
    for root, dirs, files in os.walk(path):
    
    
        for name in files:
            if name=="index.meta":
                fl=join(root, name)
                shortPath=re.sub("^"+delpath,replacepath,fl)
                purl=md.getPurl(shortPath)
              
                addPURL(fl,purl,test)
                    
        if 'pageimg' in dirs:
            dirs.remove('pageimg')  # don't visit pageimf
        for dir in dirs:
            if  dir== "pageimg":
                dirs.remove('pageimg')
            if  dir.startswith("."):
                dirs.remove(dir)

if __name__ == '__main__':
     addDriToIndexMeta("/mpiwg/online/permanent/vlp",delpath="/mpiwg/online",test=False)