Mercurial > hg > purlService
comparison addDriToIndexMeta.py @ 30:bcd8076ff7ec
random selection of entries
bug fixes
author | dwinter |
---|---|
date | Wed, 05 Jun 2013 17:37:09 +0200 |
parents | f748e2b684c9 |
children | 0190f49bce88 |
comparison
equal
deleted
inserted
replaced
29:7027fbf1d141 | 30:bcd8076ff7ec |
---|---|
5 ''' | 5 ''' |
6 import os | 6 import os |
7 import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs | 7 import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs |
8 import re | 8 import re |
9 from lxml import etree | 9 from lxml import etree |
10 import sys | |
10 | 11 |
11 from os.path import join, getsize | 12 from os.path import join, getsize |
12 | 13 |
13 errorFile = file("/tmp/addDRIErrors.txt","w") | 14 errorFile = file("/tmp/addDRIErrors.txt","w") |
14 parseErrorFile = file("/tmp/addDRIParseErrors.txt","w") | 15 parseErrorFile = file("/tmp/addDRIParseErrors.txt","w") |
15 alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w") | 16 alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w") |
16 | 17 |
18 | |
19 def correctAuthor(tree): | |
20 """ersetzt in den autor felder "\r" durch ;""" | |
21 | |
22 | |
23 authors = tree.xpath("/resource/meta/bib/author") | |
24 for author in authors: | |
25 | |
26 if author.text is not None: | |
27 splitted =author.text.split("\n") | |
28 txt = "; ".join(splitted) | |
29 | |
30 author.text=txt | |
31 | |
32 | |
33 | |
17 def addPURL(fl,purl,test=False): | 34 def addPURL(fl,purl,test=False): |
18 try: | 35 try: |
19 tree = etree.parse(fl) | 36 tree = etree.parse(fl) |
20 except: | 37 except: |
21 parseErrorFile.write("PARSE ERROR:"+fl+"\n") | 38 parseErrorFile.write("PARSE ERROR:"+fl+"\n") |
22 return False | 39 return False |
40 | |
23 dris = tree.xpath("/resource/meta/dri[@type='mpiwg']") | 41 dris = tree.xpath("/resource/meta/dri[@type='mpiwg']") |
42 | |
43 | |
44 correctAuthor(tree) | |
45 | |
46 | |
24 | 47 |
25 if len(dris)==0: # erzeuge neu | 48 if len(dris)==0: # erzeuge neu |
26 newDri = etree.Element("dri",type="mpiwg") | 49 newDri = etree.Element("dri",type="mpiwg") |
27 newDri.text=purl | 50 newDri.text=purl |
28 metas=tree.xpath("/resource/meta") | 51 metas=tree.xpath("/resource/meta") |
32 else: | 55 else: |
33 metas[0].append(newDri) | 56 metas[0].append(newDri) |
34 else: | 57 else: |
35 dris[0].text=purl | 58 dris[0].text=purl |
36 alreadyExistsFile.write("%s \n"%fl) | 59 alreadyExistsFile.write("%s \n"%fl) |
37 return True | 60 #return True |
38 | 61 |
39 print etree.tostring(tree, pretty_print=True) | 62 print etree.tostring(tree, pretty_print=True) |
40 | 63 |
64 | |
41 if not test: | 65 if not test: |
42 try: | 66 try: |
67 | |
43 os.rename(fl, fl+"_mpiwg_dri") | 68 os.rename(fl, fl+"_mpiwg_dri") |
44 out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False) | 69 out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False) |
45 fo = file(fl,"w") | 70 fo = file(fl,"w") |
46 fo.write(out) | 71 fo.write(out) |
47 fo.close | 72 fo.close |
48 except: | 73 except: |
74 | |
75 print sys.exc_info()[0] | |
76 print sys.exc_info()[1] | |
49 errorFile.write(fl+"\n") | 77 errorFile.write(fl+"\n") |
50 return True | 78 return True |
51 | 79 |
52 def addDriToIndexMeta(path,delpath="",replacepath="",test=False): | 80 def addDriToIndexMeta(path,delpath="",replacepath="",test=False): |
53 | 81 |
71 dirs.remove('pageimg') | 99 dirs.remove('pageimg') |
72 if dir.startswith("."): | 100 if dir.startswith("."): |
73 dirs.remove(dir) | 101 dirs.remove(dir) |
74 | 102 |
75 if __name__ == '__main__': | 103 if __name__ == '__main__': |
76 addDriToIndexMeta("/mpiwg/online/",delpath="/mpiwg/online",test=True) | 104 addDriToIndexMeta("/mpiwg/online/permanent/vlp",delpath="/mpiwg/online",test=False) |