Mercurial > hg > purlService
view addDriToIndexMeta.py @ 19:cce127a28fc9
added getpurls
author | dwinter |
---|---|
date | Wed, 21 Nov 2012 15:39:08 +0100 |
parents | fad73212354b |
children | f748e2b684c9 |
line wrap: on
line source
''' Created on 01.11.2012 @author: dwinter ''' import os import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs import re from lxml import etree from os.path import join, getsize errorFile = file("/tmp/addDRIErrors.txt","w") parseErrorFile = file("/tmp/addDRIParseErrors.txt","w") def addPURL(fl,purl,test=False): try: tree = etree.parse(fl) except: parseErrorFile.write("PARSE ERROR:"+fl+"\n") return False dris = tree.xpath("/resource/meta/dri[@type='mpiwg']") if len(dris)==0: # erzeuge neu newDri = etree.Element("dri",type="mpiwg") newDri.text=purl metas=tree.xpath("/resource/meta") if len(metas)==0: parseErrorFile.write("no resource/meta: %s \n"%fl) return False else: metas[0].append(newDri) else: dris[0].text=purl print etree.tostring(tree, pretty_print=True) if not test: try: os.rename(fl, fl+"_mpiwg_dri") out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False) fo = file(fl,"w") fo.write(out) fo.close except: errorFile.write(fl+"\n") return True def addDriToIndexMeta(path,delpath="",replacepath="",test=False): md=manageIndexMetaPURLs.IndexMetaPURLManager() for root, dirs, files in os.walk(path): for name in files: if name=="index.meta": fl=join(root, name) shortPath=re.sub("^"+delpath,replacepath,fl) purl=md.getPurl(shortPath) addPURL(fl,purl,test) if 'pageimg' in dirs: dirs.remove('pageimg') # don't visit pageimf for dir in dirs: if dir== "pageimg": dirs.remove('pageimg') if dir.startswith("."): dirs.remove(dir) if __name__ == '__main__': addDriToIndexMeta("/mpiwg/online/",delpath="/mpiwg/online",test=False)