annotate harvestToPurl.py @ 3:caeede0c9464

update and redirector
author dwinter
date Thu, 01 Nov 2012 15:52:14 +0100
parents 08a476cdcfea
children 107f13ca333b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dwinter
parents:
diff changeset
1 '''
dwinter
parents:
diff changeset
2 Created on 31.10.2012
dwinter
parents:
diff changeset
3
dwinter
parents:
diff changeset
4 @author: dwinter
dwinter
parents:
diff changeset
5 '''
dwinter
parents:
diff changeset
6
dwinter
parents:
diff changeset
7 import manageIndexMetaPURLs
dwinter
parents:
diff changeset
8
dwinter
parents:
diff changeset
9
dwinter
parents:
diff changeset
10
dwinter
parents:
diff changeset
11 import os
dwinter
parents:
diff changeset
12 from os.path import join, getsize
dwinter
parents:
diff changeset
13 import sys
dwinter
parents:
diff changeset
14 import re
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
15 from lxml import etree
0
dwinter
parents:
diff changeset
16
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
17 def harvestIndexMeta(path,user,delpath="",replacepath="", update=False):
0
dwinter
parents:
diff changeset
18
dwinter
parents:
diff changeset
19 md = manageIndexMetaPURLs.IndexMetaPURLManager()
dwinter
parents:
diff changeset
20
dwinter
parents:
diff changeset
21 for root, dirs, files in os.walk(path):
dwinter
parents:
diff changeset
22
dwinter
parents:
diff changeset
23
dwinter
parents:
diff changeset
24 for name in files:
dwinter
parents:
diff changeset
25 if name.endswith(".meta"):
dwinter
parents:
diff changeset
26 fl=join(root, name)
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
27
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
28 imagePath=createImagePath(fl,root)
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
29 imagePath=re.sub("^"+delpath,replacepath,imagePath)
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
30
0
dwinter
parents:
diff changeset
31 fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt
dwinter
parents:
diff changeset
32
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
33 val,purl = md.register(fl, True, user=user,imagePath=imagePath,update=update)
0
dwinter
parents:
diff changeset
34 if val==manageIndexMetaPURLs.ALREADY_EXISTS:
dwinter
parents:
diff changeset
35 print "found %s -> %s"%(fl,purl)
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
36
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
37 elif val==manageIndexMetaPURLs.UPDATED:
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
38 print "updated %s -> %s"%(fl,purl)
0
dwinter
parents:
diff changeset
39 else:
dwinter
parents:
diff changeset
40 print "added %s -> %s"%(fl,purl)
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
41
0
dwinter
parents:
diff changeset
42 if 'pageimg' in dirs:
dwinter
parents:
diff changeset
43 dirs.remove('pageimg') # don't visit pageimf
dwinter
parents:
diff changeset
44 for dir in dirs:
dwinter
parents:
diff changeset
45 if dir== "pageimg":
dwinter
parents:
diff changeset
46 dirs.remove('pageimg')
dwinter
parents:
diff changeset
47 if dir.startswith("."):
dwinter
parents:
diff changeset
48 dirs.remove(dir)
dwinter
parents:
diff changeset
49
dwinter
parents:
diff changeset
50
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
51 # erzeugt einen imagepath wenn kein texttooltag existiert
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
52 def createImagePath(path,root):
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
53 tree= etree.parse(path)
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
54
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
55 #teste ob texttool tag, dann kein imagePath
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
56 tt =tree.xpath('//texttool')
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
57 if len(tt)>0:
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
58 return ""
0
dwinter
parents:
diff changeset
59
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
60
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
61 #im anderen fall, heuristic
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
62
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
63 imageFolders=["pageimg","pages"]
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
64
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
65 for imageFolder in imageFolders:
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
66 fl=join(root, imageFolder)
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
67 if os.path.exists(fl): # gibt es einen der folder
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
68 return fl
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
69
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
70 return ""
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
71
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
72
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
73
0
dwinter
parents:
diff changeset
74
dwinter
parents:
diff changeset
75 if __name__ == '__main__':
dwinter
parents:
diff changeset
76 args = sys.argv[1:]
dwinter
parents:
diff changeset
77 if not (len(args)==2 or len(args)==3 or len(args)==4):
dwinter
parents:
diff changeset
78 print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted"
dwinter
parents:
diff changeset
79 sys.exit(2)
dwinter
parents:
diff changeset
80 path=args[0]
dwinter
parents:
diff changeset
81 user=args[1]
dwinter
parents:
diff changeset
82
dwinter
parents:
diff changeset
83 delpath=""
dwinter
parents:
diff changeset
84 replacepath=""
dwinter
parents:
diff changeset
85
dwinter
parents:
diff changeset
86 if len(args)==3:
dwinter
parents:
diff changeset
87 delpath=args[2]
dwinter
parents:
diff changeset
88 elif len(args)==4:
dwinter
parents:
diff changeset
89 delpath=args[2]
dwinter
parents:
diff changeset
90 replacepath=args[3]
dwinter
parents:
diff changeset
91
dwinter
parents:
diff changeset
92
dwinter
parents:
diff changeset
93 if not os.path.exists(path):
dwinter
parents:
diff changeset
94 print "ERROR: path %s does not exist!"%path
dwinter
parents:
diff changeset
95 sys.exit(2)
dwinter
parents:
diff changeset
96
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
97 harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath,update=True)
0
dwinter
parents:
diff changeset
98
dwinter
parents:
diff changeset
99