0
|
1 '''
|
|
2 Created on 31.10.2012
|
|
3
|
|
4 @author: dwinter
|
|
5 '''
|
|
6
|
|
7 import manageIndexMetaPURLs
|
|
8
|
|
9
|
|
10
|
|
11 import os
|
|
12 from os.path import join, getsize
|
|
13 import sys
|
|
14 import re
|
3
|
15 from lxml import etree
|
0
|
16
|
3
|
17 def harvestIndexMeta(path,user,delpath="",replacepath="", update=False):
|
0
|
18
|
|
19 md = manageIndexMetaPURLs.IndexMetaPURLManager()
|
|
20
|
|
21 for root, dirs, files in os.walk(path):
|
|
22
|
|
23
|
|
24 for name in files:
|
|
25 if name.endswith(".meta"):
|
|
26 fl=join(root, name)
|
3
|
27
|
|
28 imagePath=createImagePath(fl,root)
|
|
29 imagePath=re.sub("^"+delpath,replacepath,imagePath)
|
|
30
|
0
|
31 fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt
|
|
32
|
3
|
33 val,purl = md.register(fl, True, user=user,imagePath=imagePath,update=update)
|
0
|
34 if val==manageIndexMetaPURLs.ALREADY_EXISTS:
|
|
35 print "found %s -> %s"%(fl,purl)
|
3
|
36
|
|
37 elif val==manageIndexMetaPURLs.UPDATED:
|
|
38 print "updated %s -> %s"%(fl,purl)
|
0
|
39 else:
|
|
40 print "added %s -> %s"%(fl,purl)
|
3
|
41
|
0
|
42 if 'pageimg' in dirs:
|
|
43 dirs.remove('pageimg') # don't visit pageimf
|
|
44 for dir in dirs:
|
|
45 if dir== "pageimg":
|
|
46 dirs.remove('pageimg')
|
|
47 if dir.startswith("."):
|
|
48 dirs.remove(dir)
|
|
49
|
|
50
|
3
|
51 # erzeugt einen imagepath wenn kein texttooltag existiert
|
|
52 def createImagePath(path,root):
|
|
53 tree= etree.parse(path)
|
|
54
|
|
55 #teste ob texttool tag, dann kein imagePath
|
|
56 tt =tree.xpath('//texttool')
|
|
57 if len(tt)>0:
|
|
58 return ""
|
0
|
59
|
3
|
60
|
|
61 #im anderen fall, heuristic
|
|
62
|
|
63 imageFolders=["pageimg","pages"]
|
|
64
|
|
65 for imageFolder in imageFolders:
|
|
66 fl=join(root, imageFolder)
|
|
67 if os.path.exists(fl): # gibt es einen der folder
|
|
68 return fl
|
|
69
|
|
70 return ""
|
|
71
|
|
72
|
|
73
|
0
|
74
|
|
75 if __name__ == '__main__':
|
|
76 args = sys.argv[1:]
|
|
77 if not (len(args)==2 or len(args)==3 or len(args)==4):
|
|
78 print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted"
|
|
79 sys.exit(2)
|
|
80 path=args[0]
|
|
81 user=args[1]
|
|
82
|
|
83 delpath=""
|
|
84 replacepath=""
|
|
85
|
|
86 if len(args)==3:
|
|
87 delpath=args[2]
|
|
88 elif len(args)==4:
|
|
89 delpath=args[2]
|
|
90 replacepath=args[3]
|
|
91
|
|
92
|
|
93 if not os.path.exists(path):
|
|
94 print "ERROR: path %s does not exist!"%path
|
|
95 sys.exit(2)
|
|
96
|
3
|
97 harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath,update=True)
|
0
|
98
|
|
99 |