annotate MpdlXmlTextServer.py @ 517:aaacdf551f6f

remove global info from processPageInfo.
author casties
date Mon, 05 Mar 2012 19:11:59 +0100
parents 7d7b639d7be7
children 91051b36b9cc
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
1 from OFS.SimpleItem import SimpleItem
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
3
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
4 import xml.etree.ElementTree as ET
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
5
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
6 import re
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
7 import logging
134
6a33aa624ba4 fixed more oopsies
casties
parents: 133
diff changeset
8 import urllib
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
9 import urlparse
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
10 import base64
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
11
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
12 from SrvTxtUtils import getInt, getText, getHttpData
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
13
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
14 def serialize(node):
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
15 """returns a string containing an XML snippet of node"""
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
16 s = ET.tostring(node, 'UTF-8')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
17 # snip off XML declaration
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
18 if s.startswith('<?xml'):
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
19 i = s.find('?>')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
20 return s[i+3:]
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
21
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
22 return s
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
23
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
24
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
25 class MpdlXmlTextServer(SimpleItem):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
26 """TextServer implementation for MPDL-XML eXist server"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
27 meta_type="MPDL-XML TextServer"
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
28
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
29 manage_options=(
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
30 {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'},
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
31 )+SimpleItem.manage_options
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
32
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
33 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
34
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
35 def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
36 """constructor"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
37 self.id=id
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
38 self.title=title
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
39 self.timeout = timeout
132
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
40 if serverName is None:
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
41 self.serverUrl = serverUrl
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
42 else:
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
43 self.serverUrl = "http://%s/mpdl/interface/"%serverName
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
44
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
45 def getHttpData(self, url, data=None):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
46 """returns result from url+data HTTP request"""
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
47 return getHttpData(url,data,timeout=self.timeout)
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
48
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
49 def getServerData(self, method, data=None):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
50 """returns result from text server for method+data"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
51 url = self.serverUrl+method
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
52 return getHttpData(url,data,timeout=self.timeout)
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
53
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
54
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
55 def getPlacesOnPage(self, docinfo=None, pn=None):
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
56 """Returns list of GIS places of page pn"""
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
57 docpath = docinfo.get('textURLPath',None)
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
58 if not docpath:
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
59 return None
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
60
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
61 places=[]
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
62 text=self.getServerData("xpath.xql", "document=%s&xpath=//place&pn=%s"%(docpath,pn))
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
63 dom = ET.fromstring(text)
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
64 result = dom.findall(".//resultPage/place")
236
ccdce5aca47b *** empty log message ***
abukhman
parents: 233
diff changeset
65 for l in result:
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
66 id = l.get("id")
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
67 name = l.text
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
68 place = {'id': id, 'name': name}
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
69 places.append(place)
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
70
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
71 return places
307
ec5e920a61e6 *** empty log message ***
abukhman
parents: 306
diff changeset
72
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
73
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
74 def getTextInfo(self, mode='', docinfo=None):
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
75 """reads document info, including page concordance, from text server"""
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
76 logging.debug("getDocInfo")
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
77 #TODO: check cached info
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
78 docpath = docinfo.get('textURLPath', None)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
79 if docpath is None:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
80 logging.error("getTextInfo: no textURLPath!")
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
81 return docinfo
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
82
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
83 # we need to set a result set size
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
84 pagesize = 10000
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
85 pn = 1
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
86 # fetch docinfo
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
87 pagexml = self.getServerData("doc-info.xql","document=%s&info=%s&pageSize=%s&pn=%s"%(docpath,mode,pagesize,pn))
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
88 dom = ET.fromstring(pagexml)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
89 # all info in tag <document>
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
90 doc = dom.find("document")
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
91 if doc is None:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
92 logging.error("getTextInfo: unable to find document-tag!")
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
93 else:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
94 # go through all child elements
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
95 for tag in doc:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
96 name = tag.tag
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
97 # numTextPages
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
98 if name == 'countPages':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
99 np = getInt(tag.text)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
100 if np > 0:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
101 docinfo['numTextPages'] = np
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
102
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
103 # numFigureEntries
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
104 elif name == 'countFigureEntries':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
105 docinfo['numFigureEntries'] = getInt(tag.text)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
106
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
107 # numTocEntries
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
108 elif name == 'countTocEntries':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
109 # WTF: s1 = int(s)/30+1
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
110 docinfo['numTocEntries'] = getInt(tag.text)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
111
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
112 # numPlaces
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
113 elif name == 'countPlaces':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
114 docinfo['numPlaces'] = getInt(tag.text)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
115
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
116 # pageNumbers
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
117 elif name == 'pageNumbers':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
118 # contains tags with page numbers
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
119 # <pn><n>4</n><no>4</no><non/></pn>
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
120 # n=scan number, no=original page no, non=normalized original page no
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
121 # pageNumbers is a dict indexed by scan number
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
122 pages = {}
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
123 for pn in tag:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
124 page = {}
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
125 n = 0
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
126 for p in pn:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
127 if p.tag == 'n':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
128 n = getInt(p.text)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
129 page['n'] = n
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
130 elif p.tag == 'no':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
131 page['no'] = p.text
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
132 elif p.tag == 'non':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
133 page['non'] = p.text
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
134
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
135 if n > 0:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
136 pages[n] = page
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
137
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
138 docinfo['pageNumbers'] = pages
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
139 #logging.debug("got pageNumbers=%s"%repr(pages))
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
140
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
141 # toc
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
142 elif name == 'toc':
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
143 # contains tags with table of contents
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
144 # TODO: implement
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
145 pass
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
146
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
147 return docinfo
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
148
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
149
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
150 def processPageInfo(self, dom, docinfo, pageinfo):
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
151 """processes page info divs from dom and stores in docinfo and pageinfo"""
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
152 # assume first second level div is pageMeta
15394486ab75 working with new templates
casties
parents: 465
diff changeset
153 alldivs = dom.find("div")
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
154
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
155 if alldivs is None or alldivs.get('class', '') != 'pageMeta':
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
156 logging.error("processPageInfo: pageMeta div not found!")
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
157 return
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
158
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
159 for div in alldivs:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
160 dc = div.get('class')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
161
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
162 # pageNumberOrig
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
163 if dc == 'pageNumberOrig':
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
164 pageinfo['pageNumberOrig'] = div.text
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
165
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
166 # pageNumberOrigNorm
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
167 elif dc == 'pageNumberOrigNorm':
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
168 pageinfo['pageNumberOrigNorm'] = div.text
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
169
474
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
170 # pageHeaderTitle
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
171 elif dc == 'pageHeaderTitle':
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
172 pageinfo['pageHeaderTitle'] = div.text
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
173
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
174 #logging.debug("processPageInfo: pageinfo=%s"%repr(pageinfo))
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
175 return
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
176
388
0265ab93716a *** empty log message ***
abukhman
parents: 386
diff changeset
177
471
415a7026eeda split viewMode in viewMode and viewType
casties
parents: 469
diff changeset
178 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
179 """returns single page from fulltext"""
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
180
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
181 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn))
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
182 # check for cached text -- but ideally this shouldn't be called twice
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
183 if pageinfo.has_key('textPage'):
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
184 logging.debug("getTextPage: using cached text")
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
185 return pageinfo['textPage']
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
186
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
187 docpath = docinfo['textURLPath']
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
188 # just checking
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
189 if pageinfo['current'] != pn:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
190 logging.warning("getTextPage: current!=pn!")
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
191
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
192 # stuff for constructing full urls
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
193 selfurl = docinfo['viewerUrl']
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
194 textParams = {'document': docpath,
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
195 'pn': pn}
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
196 if 'characterNormalization' in pageinfo:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
197 textParams['characterNormalization'] = pageinfo['characterNormalization']
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
198
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
199 if not mode:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
200 # default is dict
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
201 mode = 'text'
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
202
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
203 modes = mode.split(',')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
204 # check for multiple layers
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
205 if len(modes) > 1:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
206 logging.debug("getTextPage: more than one mode=%s"%mode)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
207
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
208 # search mode
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
209 if 'search' in modes:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
210 # add highlighting
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
211 highlightQuery = pageinfo.get('highlightQuery', None)
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
212 if highlightQuery:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
213 textParams['highlightQuery'] = highlightQuery
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
214 textParams['highlightElement'] = pageinfo.get('highlightElement', '')
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
215 textParams['highlightElementPos'] = pageinfo.get('highlightElementPos', '')
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
216
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
217 # ignore mode in the following
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
218 modes.remove('search')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
219
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
220 # other modes don't combine
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
221 if 'dict' in modes:
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
222 # dict is called textPollux in the backend
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
223 textmode = 'textPollux'
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
224 elif len(modes) == 0:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
225 # text is default mode
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
226 textmode = 'text'
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
227 else:
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
228 # just take first mode
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
229 textmode = modes[0]
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
230
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
231 textParams['mode'] = textmode
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
232
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
233 # fetch the page
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
234 pagexml = self.getServerData("page-fragment.xql",urllib.urlencode(textParams))
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
235 dom = ET.fromstring(pagexml)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
236 # extract additional info
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
237 self.processPageInfo(dom, docinfo, pageinfo)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
238 # page content is in <div class="pageContent">
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
239 pagediv = None
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
240 # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent']
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
241 # so we look at the second level divs
15394486ab75 working with new templates
casties
parents: 465
diff changeset
242 alldivs = dom.findall("div")
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
243 for div in alldivs:
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
244 dc = div.get('class')
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
245 # page content div
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
246 if dc == 'pageContent':
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
247 pagediv = div
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
248 break
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
249
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
250 # plain text mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
251 if textmode == "text":
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
252 # get full url assuming documentViewer is parent
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
253 selfurl = self.getLink()
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
254 if pagediv is not None:
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
255 links = pagediv.findall(".//a")
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
256 for l in links:
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
257 href = l.get('href')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
258 if href and href.startswith('#note-'):
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
259 href = href.replace('#note-',"%s#note-"%selfurl)
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
260 l.set('href', href)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
261
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
262 return serialize(pagediv)
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
263
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
264 # text-with-links mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
265 elif textmode == "textPollux":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
266 if pagediv is not None:
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
267 viewerurl = docinfo['viewerUrl']
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
268 selfurl = self.getLink()
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
269 # check all a-tags
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
270 links = pagediv.findall(".//a")
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
271 for l in links:
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
272 href = l.get('href')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
273
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
274 if href:
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
275 # is link with href
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
276 linkurl = urlparse.urlparse(href)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
277 #logging.debug("getTextPage: linkurl=%s"%repr(linkurl))
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
278 if linkurl.path.endswith('GetDictionaryEntries'):
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
279 #TODO: replace wordInfo page
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
280 # is dictionary link - change href (keeping parameters)
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
281 #l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl))
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
282 # add target to open new page
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
283 l.set('target', '_blank')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
284
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
285 # TODO: is this needed?
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
286 # if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
287 # selfurl = self.absolute_url()
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
288 # l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
289 # l.set('target', '_blank')
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
290 # l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
291 # l.set('ondblclick', 'popupWin.focus();')
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
292
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
293 if href.startswith('#note-'):
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
294 # note link
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
295 l.set('href', href.replace('#note-',"%s#note-"%selfurl))
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
296
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
297 return serialize(pagediv)
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
298
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
299 # xml mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
300 elif textmode == "xml":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
301 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
302 return serialize(pagediv)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
303
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
304 # pureXml mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
305 elif textmode == "pureXml":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
306 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
307 return serialize(pagediv)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
308
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
309 # gis mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
310 elif textmode == "gis":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
311 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
312 # check all a-tags
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
313 links = pagediv.findall(".//a")
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
314 # add our URL as backlink
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
315 selfurl = self.getLink()
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
316 doc = base64.b64encode(selfurl)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
317 for l in links:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
318 href = l.get('href')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
319 if href:
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
320 if href.startswith('http://mappit.mpiwg-berlin.mpg.de'):
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
321 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href))
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
322 l.set('target', '_blank')
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
323
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
324 return serialize(pagediv)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
325
501
29c6d09a506c more cleanup.
casties
parents: 482
diff changeset
326 return None
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
327
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
328
509
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
329 def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None):
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
330 """loads list of search results and stores XML in docinfo"""
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
331
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
332 logging.debug("getSearchResults mode=%s query=%s"%(mode, query))
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
333 if mode == "none":
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
334 return docinfo
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
335
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
336 cachedQuery = docinfo.get('cachedQuery', None)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
337 if cachedQuery is not None:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
338 # cached search result
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
339 if cachedQuery == '%s_%s'%(mode,query):
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
340 # same query
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
341 return docinfo
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
342
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
343 else:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
344 # different query
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
345 del docinfo['resultSize']
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
346 del docinfo['resultXML']
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
347
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
348 # cache query
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
349 docinfo['cachedQuery'] = '%s_%s'%(mode,query)
509
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
350
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
351 # fetch full results
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
352 docpath = docinfo['textURLPath']
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
353 params = {'document': docpath,
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
354 'mode': 'text',
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
355 'queryType': mode,
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
356 'query': query,
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
357 'queryResultPageSize': 1000,
509
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
358 'queryResultPN': 1,
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
359 'characterNormalization': pageinfo.get('characterNormalization', 'reg')}
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
360 pagexml = self.getServerData("doc-query.xql",urllib.urlencode(params))
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
361 #pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&s=%s&viewMode=%s&characterNormalization=%s&highlightElementPos=%s&highlightElement=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, s, viewMode,characterNormalization, highlightElementPos, highlightElement, urllib.quote(highlightQuery)))
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
362 dom = ET.fromstring(pagexml)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
363 # page content is in <div class="queryResultPage">
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
364 pagediv = None
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
365 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage']
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
366 alldivs = dom.findall("div")
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
367 for div in alldivs:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
368 dc = div.get('class')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
369 # page content div
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
370 if dc == 'queryResultPage':
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
371 pagediv = div
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
372
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
373 elif dc == 'queryResultHits':
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
374 docinfo['resultSize'] = getInt(div.text)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
375
510
4fb35343d2e7 more search. nicer css.
casties
parents: 509
diff changeset
376 if pagediv is not None:
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
377 # store XML in docinfo
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
378 docinfo['resultXML'] = ET.tostring(pagediv, 'UTF-8')
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
379
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
380 return docinfo
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
381
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
382
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
383 def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None):
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
384 """returns single page from the table of contents"""
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
385 logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn))
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
386 # get (cached) result
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
387 self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
388
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
389 resultxml = docinfo.get('resultXML', None)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
390 if not resultxml:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
391 logging.error("getResultPage: unable to find resultXML")
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
392 return "Error: no result!"
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
393
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
394 if size is None:
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
395 size = pageinfo.get('resultPageSize', 10)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
396
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
397 if start is None:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
398 start = (pn - 1) * size
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
399
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
400 fullresult = ET.fromstring(resultxml)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
401
510
4fb35343d2e7 more search. nicer css.
casties
parents: 509
diff changeset
402 if fullresult is not None:
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
403 # paginate
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
404 first = start-1
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
405 len = size
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
406 del fullresult[:first]
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
407 del fullresult[len:]
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
408 tocdivs = fullresult
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
409
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
410 # check all a-tags
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
411 links = tocdivs.findall(".//a")
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
412 for l in links:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
413 href = l.get('href')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
414 if href:
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
415 # assume all links go to pages
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
416 linkUrl = urlparse.urlparse(href)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
417 linkParams = urlparse.parse_qs(linkUrl.query)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
418 # take some parameters
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
419 params = {'pn': linkParams['pn'],
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
420 'highlightQuery': linkParams.get('highlightQuery',''),
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
421 'highlightElement': linkParams.get('highlightElement',''),
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
422 'highlightElementPos': linkParams.get('highlightElementPos','')
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
423 }
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
424 url = self.getLink(params=params)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
425 l.set('href', url)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
426
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
427 return serialize(tocdivs)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
428
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
429 return "ERROR: no results!"
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
430
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
431
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
432 def getToc(self, mode="text", docinfo=None):
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
433 """loads table of contents and stores XML in docinfo"""
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
434 logging.debug("getToc mode=%s"%mode)
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
435 if mode == "none":
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
436 return docinfo
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
437
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
438 if 'tocSize_%s'%mode in docinfo:
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
439 # cached toc
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
440 return docinfo
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
441
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
442 docpath = docinfo['textURLPath']
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
443 # we need to set a result set size
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
444 pagesize = 1000
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
445 pn = 1
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
446 if mode == "text":
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
447 queryType = "toc"
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
448 else:
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
449 queryType = mode
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
450 # number of entries in toc
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
451 tocSize = 0
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
452 tocDiv = None
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
453 # fetch full toc
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
454 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
455 dom = ET.fromstring(pagexml)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
456 # page content is in <div class="queryResultPage">
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
457 pagediv = None
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
458 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage']
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
459 alldivs = dom.findall("div")
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
460 for div in alldivs:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
461 dc = div.get('class')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
462 # page content div
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
463 if dc == 'queryResultPage':
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
464 pagediv = div
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
465
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
466 elif dc == 'queryResultHits':
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
467 docinfo['tocSize_%s'%mode] = getInt(div.text)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
468
510
4fb35343d2e7 more search. nicer css.
casties
parents: 509
diff changeset
469 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
470 # store XML in docinfo
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
471 docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
472
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
473 return docinfo
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
474
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
475 def getTocPage(self, mode="text", pn=None, start=None, size=None, pageinfo=None, docinfo=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
476 """returns single page from the table of contents"""
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
477 logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn))
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
478 if mode == "text":
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
479 queryType = "toc"
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
480 else:
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
481 queryType = mode
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
482
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
483 # check for cached TOC
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
484 if not docinfo.has_key('tocXML_%s'%mode):
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
485 self.getToc(mode=mode, docinfo=docinfo)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
486
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
487 tocxml = docinfo.get('tocXML_%s'%mode, None)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
488 if not tocxml:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
489 logging.error("getTocPage: unable to find tocXML")
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
490 return "Error: no table of contents!"
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
491
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
492 if size is None:
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
493 size = pageinfo.get('tocPageSize', 30)
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
494
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
495 if start is None:
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
496 start = (pn - 1) * size
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
497
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
498 fulltoc = ET.fromstring(tocxml)
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
499
510
4fb35343d2e7 more search. nicer css.
casties
parents: 509
diff changeset
500 if fulltoc is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
501 # paginate
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
502 first = (start - 1) * 2
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
503 len = size * 2
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
504 del fulltoc[:first]
456
b27a7d2f06ff even toc pagination works
casties
parents: 455
diff changeset
505 del fulltoc[len:]
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
506 tocdivs = fulltoc
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
507
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
508 # check all a-tags
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
509 links = tocdivs.findall(".//a")
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
510 for l in links:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
511 href = l.get('href')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
512 if href:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
513 # take pn from href
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
514 m = re.match(r'page-fragment\.xql.*pn=(\d+)', href)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
515 if m is not None:
476
1d93a8cb2d8f more new template stuff
casties
parents: 475
diff changeset
516 # and create new url (assuming parent is documentViewer)
1d93a8cb2d8f more new template stuff
casties
parents: 475
diff changeset
517 url = self.getLink('pn', m.group(1))
1d93a8cb2d8f more new template stuff
casties
parents: 475
diff changeset
518 l.set('href', url)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
519 else:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
520 logging.warning("getTocPage: Problem with link=%s"%href)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
521
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
522 # fix two-divs-per-row with containing div
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
523 newtoc = ET.Element('div', {'class':'queryResultPage'})
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
524 for (d1,d2) in zip(tocdivs[::2],tocdivs[1::2]):
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
525 e = ET.Element('div',{'class':'tocline'})
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
526 e.append(d1)
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
527 e.append(d2)
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
528 newtoc.append(e)
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
529
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
530 return serialize(newtoc)
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
531
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
532 return "ERROR: no table of contents!"
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
533
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
534
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
535 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
536 """change settings"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
537 self.title=title
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
538 self.timeout = timeout
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
539 self.serverUrl = serverUrl
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
540 if RESPONSE is not None:
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
541 RESPONSE.redirect('manage_main')
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
542
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
543 # management methods
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
544 def manage_addMpdlXmlTextServerForm(self):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
545 """Form for adding"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
546 pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self)
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
547 return pt()
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
548
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
549 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
550 #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
551 """add zogiimage"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
552 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
553 self.Destination()._setObject(id, newObj)
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
554 if RESPONSE is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
555 RESPONSE.redirect('manage_main')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
556
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
557