comparison addDriToIndexMeta.py @ 30:bcd8076ff7ec

random selection of entries bug fixes
author dwinter
date Wed, 05 Jun 2013 17:37:09 +0200
parents f748e2b684c9
children 0190f49bce88
comparison
equal deleted inserted replaced
29:7027fbf1d141 30:bcd8076ff7ec
5 ''' 5 '''
6 import os 6 import os
7 import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs 7 import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs
8 import re 8 import re
9 from lxml import etree 9 from lxml import etree
10 import sys
10 11
11 from os.path import join, getsize 12 from os.path import join, getsize
12 13
13 errorFile = file("/tmp/addDRIErrors.txt","w") 14 errorFile = file("/tmp/addDRIErrors.txt","w")
14 parseErrorFile = file("/tmp/addDRIParseErrors.txt","w") 15 parseErrorFile = file("/tmp/addDRIParseErrors.txt","w")
15 alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w") 16 alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w")
16 17
18
19 def correctAuthor(tree):
20 """ersetzt in den autor felder "\r" durch ;"""
21
22
23 authors = tree.xpath("/resource/meta/bib/author")
24 for author in authors:
25
26 if author.text is not None:
27 splitted =author.text.split("\n")
28 txt = "; ".join(splitted)
29
30 author.text=txt
31
32
33
17 def addPURL(fl,purl,test=False): 34 def addPURL(fl,purl,test=False):
18 try: 35 try:
19 tree = etree.parse(fl) 36 tree = etree.parse(fl)
20 except: 37 except:
21 parseErrorFile.write("PARSE ERROR:"+fl+"\n") 38 parseErrorFile.write("PARSE ERROR:"+fl+"\n")
22 return False 39 return False
40
23 dris = tree.xpath("/resource/meta/dri[@type='mpiwg']") 41 dris = tree.xpath("/resource/meta/dri[@type='mpiwg']")
42
43
44 correctAuthor(tree)
45
46
24 47
25 if len(dris)==0: # erzeuge neu 48 if len(dris)==0: # erzeuge neu
26 newDri = etree.Element("dri",type="mpiwg") 49 newDri = etree.Element("dri",type="mpiwg")
27 newDri.text=purl 50 newDri.text=purl
28 metas=tree.xpath("/resource/meta") 51 metas=tree.xpath("/resource/meta")
32 else: 55 else:
33 metas[0].append(newDri) 56 metas[0].append(newDri)
34 else: 57 else:
35 dris[0].text=purl 58 dris[0].text=purl
36 alreadyExistsFile.write("%s \n"%fl) 59 alreadyExistsFile.write("%s \n"%fl)
37 return True 60 #return True
38 61
39 print etree.tostring(tree, pretty_print=True) 62 print etree.tostring(tree, pretty_print=True)
40 63
64
41 if not test: 65 if not test:
42 try: 66 try:
67
43 os.rename(fl, fl+"_mpiwg_dri") 68 os.rename(fl, fl+"_mpiwg_dri")
44 out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False) 69 out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False)
45 fo = file(fl,"w") 70 fo = file(fl,"w")
46 fo.write(out) 71 fo.write(out)
47 fo.close 72 fo.close
48 except: 73 except:
74
75 print sys.exc_info()[0]
76 print sys.exc_info()[1]
49 errorFile.write(fl+"\n") 77 errorFile.write(fl+"\n")
50 return True 78 return True
51 79
52 def addDriToIndexMeta(path,delpath="",replacepath="",test=False): 80 def addDriToIndexMeta(path,delpath="",replacepath="",test=False):
53 81
71 dirs.remove('pageimg') 99 dirs.remove('pageimg')
72 if dir.startswith("."): 100 if dir.startswith("."):
73 dirs.remove(dir) 101 dirs.remove(dir)
74 102
75 if __name__ == '__main__': 103 if __name__ == '__main__':
76 addDriToIndexMeta("/mpiwg/online/",delpath="/mpiwg/online",test=True) 104 addDriToIndexMeta("/mpiwg/online/permanent/vlp",delpath="/mpiwg/online",test=False)