Mercurial > hg > purlService
comparison harvestToPurl.py @ 3:caeede0c9464
update and redirector
author | dwinter |
---|---|
date | Thu, 01 Nov 2012 15:52:14 +0100 |
parents | 08a476cdcfea |
children | 107f13ca333b |
comparison
equal
deleted
inserted
replaced
2:fb2a3b4542a4 | 3:caeede0c9464 |
---|---|
10 | 10 |
11 import os | 11 import os |
12 from os.path import join, getsize | 12 from os.path import join, getsize |
13 import sys | 13 import sys |
14 import re | 14 import re |
15 from lxml import etree | |
15 | 16 |
16 def harvestIndexMeta(path,user,delpath="",replacepath=""): | 17 def harvestIndexMeta(path,user,delpath="",replacepath="", update=False): |
17 | 18 |
18 md = manageIndexMetaPURLs.IndexMetaPURLManager() | 19 md = manageIndexMetaPURLs.IndexMetaPURLManager() |
19 | 20 |
20 for root, dirs, files in os.walk(path): | 21 for root, dirs, files in os.walk(path): |
21 | 22 |
22 | 23 |
23 for name in files: | 24 for name in files: |
24 if name.endswith(".meta"): | 25 if name.endswith(".meta"): |
25 | |
26 fl=join(root, name) | 26 fl=join(root, name) |
27 | |
28 imagePath=createImagePath(fl,root) | |
29 imagePath=re.sub("^"+delpath,replacepath,imagePath) | |
30 | |
27 fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt | 31 fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt |
28 | 32 |
29 val,purl = md.register(fl, True, "", user) | 33 val,purl = md.register(fl, True, user=user,imagePath=imagePath,update=update) |
30 if val==manageIndexMetaPURLs.ALREADY_EXISTS: | 34 if val==manageIndexMetaPURLs.ALREADY_EXISTS: |
31 print "found %s -> %s"%(fl,purl) | 35 print "found %s -> %s"%(fl,purl) |
36 | |
37 elif val==manageIndexMetaPURLs.UPDATED: | |
38 print "updated %s -> %s"%(fl,purl) | |
32 else: | 39 else: |
33 print "added %s -> %s"%(fl,purl) | 40 print "added %s -> %s"%(fl,purl) |
41 | |
34 if 'pageimg' in dirs: | 42 if 'pageimg' in dirs: |
35 dirs.remove('pageimg') # don't visit pageimf | 43 dirs.remove('pageimg') # don't visit pageimf |
36 for dir in dirs: | 44 for dir in dirs: |
37 if dir== "pageimg": | 45 if dir== "pageimg": |
38 dirs.remove('pageimg') | 46 dirs.remove('pageimg') |
39 if dir.startswith("."): | 47 if dir.startswith("."): |
40 dirs.remove(dir) | 48 dirs.remove(dir) |
41 | 49 |
42 | 50 |
51 # erzeugt einen imagepath wenn kein texttooltag existiert | |
52 def createImagePath(path,root): | |
53 tree= etree.parse(path) | |
54 | |
55 #teste ob texttool tag, dann kein imagePath | |
56 tt =tree.xpath('//texttool') | |
57 if len(tt)>0: | |
58 return "" | |
43 | 59 |
60 | |
61 #im anderen fall, heuristic | |
62 | |
63 imageFolders=["pageimg","pages"] | |
64 | |
65 for imageFolder in imageFolders: | |
66 fl=join(root, imageFolder) | |
67 if os.path.exists(fl): # gibt es einen der folder | |
68 return fl | |
69 | |
70 return "" | |
71 | |
72 | |
73 | |
44 | 74 |
45 if __name__ == '__main__': | 75 if __name__ == '__main__': |
46 args = sys.argv[1:] | 76 args = sys.argv[1:] |
47 if not (len(args)==2 or len(args)==3 or len(args)==4): | 77 if not (len(args)==2 or len(args)==3 or len(args)==4): |
48 print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted" | 78 print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted" |
62 | 92 |
63 if not os.path.exists(path): | 93 if not os.path.exists(path): |
64 print "ERROR: path %s does not exist!"%path | 94 print "ERROR: path %s does not exist!"%path |
65 sys.exit(2) | 95 sys.exit(2) |
66 | 96 |
67 harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath) | 97 harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath,update=True) |
68 | 98 |
69 | 99 |