5
|
1 '''
|
|
2 Created on 01.11.2012
|
|
3
|
|
4 @author: dwinter
|
|
5 '''
|
|
6 import os
|
|
7 import manageIndexMetaPURLs
|
|
8 import re
|
|
9 from lxml import etree
|
|
10
|
|
11 from os.path import join, getsize
|
|
12
|
6
|
13 errorFile = file("/tmp/addDRIErrors.txt","w")
|
7
|
14 parseErrorFile = file("/tmp/addDRIParseErrors.txt","w")
|
5
|
15
|
|
16 def addPURL(fl,purl,test=False):
|
7
|
17 try:
|
|
18 tree = etree.parse(fl)
|
|
19 except:
|
|
20 parseErrorFile.write("PARSE ERROR:"+fl+"\n")
|
|
21 return False
|
5
|
22 dris = tree.xpath("/resource/meta/dri[@type='mpiwg']")
|
|
23
|
|
24 if len(dris)==0: # erzeuge neu
|
|
25 newDri = etree.Element("dri",type="mpiwg")
|
|
26 newDri.text=purl
|
|
27 metas=tree.xpath("/resource/meta")
|
|
28 if len(metas)==0:
|
7
|
29 parseErrorFile.write("no resource/meta: %s \n"%fl)
|
5
|
30 return False
|
|
31 else:
|
|
32 metas[0].append(newDri)
|
|
33 else:
|
|
34 dris[0].text=purl
|
|
35
|
|
36 print etree.tostring(tree, pretty_print=True)
|
|
37
|
|
38 if not test:
|
6
|
39 try:
|
|
40 os.rename(fl, fl+"_mpiwg_dri")
|
|
41 out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False)
|
|
42 fo = file(fl,"w")
|
|
43 fo.write(out)
|
|
44 fo.close
|
|
45 except:
|
|
46 errorFile.write(fl+"\n")
|
5
|
47 return True
|
|
48
|
6
|
49 def addDriToIndexMeta(path,delpath="",replacepath="",test=False):
|
5
|
50
|
8
|
51 md=manageIndexMetaPURLs.IndexMetaPURLManager()
|
5
|
52
|
|
53 for root, dirs, files in os.walk(path):
|
|
54
|
|
55
|
|
56 for name in files:
|
|
57 if name.endswith(".meta"):
|
|
58 fl=join(root, name)
|
|
59 shortPath=re.sub("^"+delpath,replacepath,fl)
|
8
|
60 purl=md.getPurl(shortPath)
|
5
|
61
|
|
62 addPURL(fl,purl,test)
|
|
63
|
|
64 if 'pageimg' in dirs:
|
|
65 dirs.remove('pageimg') # don't visit pageimf
|
|
66 for dir in dirs:
|
|
67 if dir== "pageimg":
|
|
68 dirs.remove('pageimg')
|
|
69 if dir.startswith("."):
|
|
70 dirs.remove(dir)
|
|
71
|
|
72 if __name__ == '__main__':
|
7
|
73 addDriToIndexMeta("/mpiwg/online/",delpath="/mpiwg/online",test=False)
|