comparison harvestToPurl.py @ 3:caeede0c9464

update and redirector
author dwinter
date Thu, 01 Nov 2012 15:52:14 +0100
parents 08a476cdcfea
children 107f13ca333b
comparison
equal deleted inserted replaced
2:fb2a3b4542a4 3:caeede0c9464
10 10
11 import os 11 import os
12 from os.path import join, getsize 12 from os.path import join, getsize
13 import sys 13 import sys
14 import re 14 import re
15 from lxml import etree
15 16
16 def harvestIndexMeta(path,user,delpath="",replacepath=""): 17 def harvestIndexMeta(path,user,delpath="",replacepath="", update=False):
17 18
18 md = manageIndexMetaPURLs.IndexMetaPURLManager() 19 md = manageIndexMetaPURLs.IndexMetaPURLManager()
19 20
20 for root, dirs, files in os.walk(path): 21 for root, dirs, files in os.walk(path):
21 22
22 23
23 for name in files: 24 for name in files:
24 if name.endswith(".meta"): 25 if name.endswith(".meta"):
25
26 fl=join(root, name) 26 fl=join(root, name)
27
28 imagePath=createImagePath(fl,root)
29 imagePath=re.sub("^"+delpath,replacepath,imagePath)
30
27 fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt 31 fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt
28 32
29 val,purl = md.register(fl, True, "", user) 33 val,purl = md.register(fl, True, user=user,imagePath=imagePath,update=update)
30 if val==manageIndexMetaPURLs.ALREADY_EXISTS: 34 if val==manageIndexMetaPURLs.ALREADY_EXISTS:
31 print "found %s -> %s"%(fl,purl) 35 print "found %s -> %s"%(fl,purl)
36
37 elif val==manageIndexMetaPURLs.UPDATED:
38 print "updated %s -> %s"%(fl,purl)
32 else: 39 else:
33 print "added %s -> %s"%(fl,purl) 40 print "added %s -> %s"%(fl,purl)
41
34 if 'pageimg' in dirs: 42 if 'pageimg' in dirs:
35 dirs.remove('pageimg') # don't visit pageimf 43 dirs.remove('pageimg') # don't visit pageimf
36 for dir in dirs: 44 for dir in dirs:
37 if dir== "pageimg": 45 if dir== "pageimg":
38 dirs.remove('pageimg') 46 dirs.remove('pageimg')
39 if dir.startswith("."): 47 if dir.startswith("."):
40 dirs.remove(dir) 48 dirs.remove(dir)
41 49
42 50
51 # erzeugt einen imagepath wenn kein texttooltag existiert
52 def createImagePath(path,root):
53 tree= etree.parse(path)
54
55 #teste ob texttool tag, dann kein imagePath
56 tt =tree.xpath('//texttool')
57 if len(tt)>0:
58 return ""
43 59
60
61 #im anderen fall, heuristic
62
63 imageFolders=["pageimg","pages"]
64
65 for imageFolder in imageFolders:
66 fl=join(root, imageFolder)
67 if os.path.exists(fl): # gibt es einen der folder
68 return fl
69
70 return ""
71
72
73
44 74
45 if __name__ == '__main__': 75 if __name__ == '__main__':
46 args = sys.argv[1:] 76 args = sys.argv[1:]
47 if not (len(args)==2 or len(args)==3 or len(args)==4): 77 if not (len(args)==2 or len(args)==3 or len(args)==4):
48 print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted" 78 print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted"
62 92
63 if not os.path.exists(path): 93 if not os.path.exists(path):
64 print "ERROR: path %s does not exist!"%path 94 print "ERROR: path %s does not exist!"%path
65 sys.exit(2) 95 sys.exit(2)
66 96
67 harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath) 97 harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath,update=True)
68 98
69 99