annotate harvestToPurl.py @ 12:0287aed01f2b

config and minor bugs
author dwinter
date Tue, 06 Nov 2012 08:41:18 +0100
parents 1b2d74f94ca8
children be8640c08d99
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dwinter
parents:
diff changeset
1 '''
dwinter
parents:
diff changeset
2 Created on 31.10.2012
dwinter
parents:
diff changeset
3
dwinter
parents:
diff changeset
4 @author: dwinter
dwinter
parents:
diff changeset
5 '''
dwinter
parents:
diff changeset
6
10
1b2d74f94ca8 repackaging
dwinter
parents: 4
diff changeset
7 import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs
0
dwinter
parents:
diff changeset
8
dwinter
parents:
diff changeset
9
dwinter
parents:
diff changeset
10
dwinter
parents:
diff changeset
11 import os
dwinter
parents:
diff changeset
12 from os.path import join, getsize
dwinter
parents:
diff changeset
13 import sys
dwinter
parents:
diff changeset
14 import re
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
15 from lxml import etree
0
dwinter
parents:
diff changeset
16
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
17 def harvestIndexMeta(path,user,delpath="",replacepath="", update=False):
0
dwinter
parents:
diff changeset
18
dwinter
parents:
diff changeset
19 md = manageIndexMetaPURLs.IndexMetaPURLManager()
dwinter
parents:
diff changeset
20
dwinter
parents:
diff changeset
21 for root, dirs, files in os.walk(path):
dwinter
parents:
diff changeset
22
dwinter
parents:
diff changeset
23
dwinter
parents:
diff changeset
24 for name in files:
dwinter
parents:
diff changeset
25 if name.endswith(".meta"):
dwinter
parents:
diff changeset
26 fl=join(root, name)
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
27
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
28 imagePath=createImagePath(fl,root)
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
29 imagePath=re.sub("^"+delpath,replacepath,imagePath)
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
30
0
dwinter
parents:
diff changeset
31 fl=re.sub("^"+delpath,replacepath,fl) #loesche den teil vom path der mir delpath beginnt
dwinter
parents:
diff changeset
32
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
33 val,purl = md.register(fl, True, user=user,imagePath=imagePath,update=update)
4
107f13ca333b try except added
dwinter
parents: 3
diff changeset
34 try:
107f13ca333b try except added
dwinter
parents: 3
diff changeset
35 if val==manageIndexMetaPURLs.ALREADY_EXISTS:
107f13ca333b try except added
dwinter
parents: 3
diff changeset
36 print "found %s -> %s"%(fl,purl)
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
37
4
107f13ca333b try except added
dwinter
parents: 3
diff changeset
38 elif val==manageIndexMetaPURLs.UPDATED:
107f13ca333b try except added
dwinter
parents: 3
diff changeset
39 print "updated %s -> %s"%(fl,purl)
107f13ca333b try except added
dwinter
parents: 3
diff changeset
40 else:
107f13ca333b try except added
dwinter
parents: 3
diff changeset
41 print "added %s -> %s"%(fl,purl)
107f13ca333b try except added
dwinter
parents: 3
diff changeset
42 except:
107f13ca333b try except added
dwinter
parents: 3
diff changeset
43 print "cannot print: %s"%purl
107f13ca333b try except added
dwinter
parents: 3
diff changeset
44
0
dwinter
parents:
diff changeset
45 if 'pageimg' in dirs:
dwinter
parents:
diff changeset
46 dirs.remove('pageimg') # don't visit pageimf
dwinter
parents:
diff changeset
47 for dir in dirs:
dwinter
parents:
diff changeset
48 if dir== "pageimg":
dwinter
parents:
diff changeset
49 dirs.remove('pageimg')
dwinter
parents:
diff changeset
50 if dir.startswith("."):
dwinter
parents:
diff changeset
51 dirs.remove(dir)
dwinter
parents:
diff changeset
52
4
107f13ca333b try except added
dwinter
parents: 3
diff changeset
53 if dir.startswith(":"):
107f13ca333b try except added
dwinter
parents: 3
diff changeset
54 dirs.remove(dir)
107f13ca333b try except added
dwinter
parents: 3
diff changeset
55
0
dwinter
parents:
diff changeset
56
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
57 # erzeugt einen imagepath wenn kein texttooltag existiert
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
58 def createImagePath(path,root):
4
107f13ca333b try except added
dwinter
parents: 3
diff changeset
59 print "parsing: %s"%path
107f13ca333b try except added
dwinter
parents: 3
diff changeset
60 try:
107f13ca333b try except added
dwinter
parents: 3
diff changeset
61 tree= etree.parse(path)
107f13ca333b try except added
dwinter
parents: 3
diff changeset
62 except:
107f13ca333b try except added
dwinter
parents: 3
diff changeset
63 print "cannot parse %s"%path
107f13ca333b try except added
dwinter
parents: 3
diff changeset
64 return ""
107f13ca333b try except added
dwinter
parents: 3
diff changeset
65
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
66 #teste ob texttool tag, dann kein imagePath
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
67 tt =tree.xpath('//texttool')
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
68 if len(tt)>0:
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
69 return ""
0
dwinter
parents:
diff changeset
70
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
71
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
72 #im anderen fall, heuristic
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
73
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
74 imageFolders=["pageimg","pages"]
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
75
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
76 for imageFolder in imageFolders:
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
77 fl=join(root, imageFolder)
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
78 if os.path.exists(fl): # gibt es einen der folder
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
79 return fl
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
80
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
81 return ""
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
82
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
83
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
84
0
dwinter
parents:
diff changeset
85
dwinter
parents:
diff changeset
86 if __name__ == '__main__':
dwinter
parents:
diff changeset
87 args = sys.argv[1:]
dwinter
parents:
diff changeset
88 if not (len(args)==2 or len(args)==3 or len(args)==4):
dwinter
parents:
diff changeset
89 print "USAGE: python harvestToPurl.py path user (optional)pathPrefixToDelete (optional)replacedeleted"
dwinter
parents:
diff changeset
90 sys.exit(2)
dwinter
parents:
diff changeset
91 path=args[0]
dwinter
parents:
diff changeset
92 user=args[1]
dwinter
parents:
diff changeset
93
dwinter
parents:
diff changeset
94 delpath=""
dwinter
parents:
diff changeset
95 replacepath=""
dwinter
parents:
diff changeset
96
dwinter
parents:
diff changeset
97 if len(args)==3:
dwinter
parents:
diff changeset
98 delpath=args[2]
dwinter
parents:
diff changeset
99 elif len(args)==4:
dwinter
parents:
diff changeset
100 delpath=args[2]
dwinter
parents:
diff changeset
101 replacepath=args[3]
dwinter
parents:
diff changeset
102
dwinter
parents:
diff changeset
103
dwinter
parents:
diff changeset
104 if not os.path.exists(path):
dwinter
parents:
diff changeset
105 print "ERROR: path %s does not exist!"%path
dwinter
parents:
diff changeset
106 sys.exit(2)
dwinter
parents:
diff changeset
107
3
caeede0c9464 update and redirector
dwinter
parents: 0
diff changeset
108 harvestIndexMeta(path,user,delpath=delpath,replacepath=replacepath,update=True)
0
dwinter
parents:
diff changeset
109
4
107f13ca333b try except added
dwinter
parents: 3
diff changeset
110