annotate addDriToIndexMeta.py @ 22:f748e2b684c9

bug in xml output fixed
author dwinter
date Wed, 30 Jan 2013 08:37:48 +0100
parents fad73212354b
children bcd8076ff7ec
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
1 '''
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
2 Created on 01.11.2012
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
3
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
4 @author: dwinter
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
5 '''
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
6 import os
10
1b2d74f94ca8 repackaging
dwinter
parents: 8
diff changeset
7 import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
8 import re
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
9 from lxml import etree
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
10
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
11 from os.path import join, getsize
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
12
6
f0417a01690a error handling if file renaming not possible
dwinter
parents: 5
diff changeset
13 errorFile = file("/tmp/addDRIErrors.txt","w")
7
78dd28ade713 error handling
dwinter
parents: 6
diff changeset
14 parseErrorFile = file("/tmp/addDRIParseErrors.txt","w")
22
f748e2b684c9 bug in xml output fixed
dwinter
parents: 11
diff changeset
15 alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w")
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
16
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
17 def addPURL(fl,purl,test=False):
7
78dd28ade713 error handling
dwinter
parents: 6
diff changeset
18 try:
78dd28ade713 error handling
dwinter
parents: 6
diff changeset
19 tree = etree.parse(fl)
78dd28ade713 error handling
dwinter
parents: 6
diff changeset
20 except:
78dd28ade713 error handling
dwinter
parents: 6
diff changeset
21 parseErrorFile.write("PARSE ERROR:"+fl+"\n")
78dd28ade713 error handling
dwinter
parents: 6
diff changeset
22 return False
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
23 dris = tree.xpath("/resource/meta/dri[@type='mpiwg']")
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
24
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
25 if len(dris)==0: # erzeuge neu
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
26 newDri = etree.Element("dri",type="mpiwg")
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
27 newDri.text=purl
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
28 metas=tree.xpath("/resource/meta")
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
29 if len(metas)==0:
7
78dd28ade713 error handling
dwinter
parents: 6
diff changeset
30 parseErrorFile.write("no resource/meta: %s \n"%fl)
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
31 return False
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
32 else:
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
33 metas[0].append(newDri)
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
34 else:
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
35 dris[0].text=purl
22
f748e2b684c9 bug in xml output fixed
dwinter
parents: 11
diff changeset
36 alreadyExistsFile.write("%s \n"%fl)
f748e2b684c9 bug in xml output fixed
dwinter
parents: 11
diff changeset
37 return True
f748e2b684c9 bug in xml output fixed
dwinter
parents: 11
diff changeset
38
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
39 print etree.tostring(tree, pretty_print=True)
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
40
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
41 if not test:
6
f0417a01690a error handling if file renaming not possible
dwinter
parents: 5
diff changeset
42 try:
f0417a01690a error handling if file renaming not possible
dwinter
parents: 5
diff changeset
43 os.rename(fl, fl+"_mpiwg_dri")
f0417a01690a error handling if file renaming not possible
dwinter
parents: 5
diff changeset
44 out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False)
f0417a01690a error handling if file renaming not possible
dwinter
parents: 5
diff changeset
45 fo = file(fl,"w")
f0417a01690a error handling if file renaming not possible
dwinter
parents: 5
diff changeset
46 fo.write(out)
f0417a01690a error handling if file renaming not possible
dwinter
parents: 5
diff changeset
47 fo.close
f0417a01690a error handling if file renaming not possible
dwinter
parents: 5
diff changeset
48 except:
f0417a01690a error handling if file renaming not possible
dwinter
parents: 5
diff changeset
49 errorFile.write(fl+"\n")
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
50 return True
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
51
6
f0417a01690a error handling if file renaming not possible
dwinter
parents: 5
diff changeset
52 def addDriToIndexMeta(path,delpath="",replacepath="",test=False):
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
53
8
733d43b30a82 connection handling changed
dwinter
parents: 7
diff changeset
54 md=manageIndexMetaPURLs.IndexMetaPURLManager()
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
55
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
56 for root, dirs, files in os.walk(path):
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
57
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
58
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
59 for name in files:
11
fad73212354b wsgi addedx
dwinter
parents: 10
diff changeset
60 if name=="index.meta":
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
61 fl=join(root, name)
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
62 shortPath=re.sub("^"+delpath,replacepath,fl)
8
733d43b30a82 connection handling changed
dwinter
parents: 7
diff changeset
63 purl=md.getPurl(shortPath)
5
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
64
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
65 addPURL(fl,purl,test)
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
66
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
67 if 'pageimg' in dirs:
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
68 dirs.remove('pageimg') # don't visit pageimf
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
69 for dir in dirs:
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
70 if dir== "pageimg":
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
71 dirs.remove('pageimg')
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
72 if dir.startswith("."):
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
73 dirs.remove(dir)
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
74
3ebe37d81071 addDri added
dwinter
parents:
diff changeset
75 if __name__ == '__main__':
22
f748e2b684c9 bug in xml output fixed
dwinter
parents: 11
diff changeset
76 addDriToIndexMeta("/mpiwg/online/",delpath="/mpiwg/online",test=True)