annotate MpdlXmlTextServer.py @ 513:67095296c95a

Merge from elementtree branch 92a6443a6f16ff25674d43814ec0d6c0a43a5e1a
author casties
date Tue, 28 Feb 2012 19:10:08 +0100
parents 91daab0c219b 551ca1641a5e
children 7d7b639d7be7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
1 from OFS.SimpleItem import SimpleItem
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
3
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
4 import xml.etree.ElementTree as ET
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
5
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
6 import re
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
7 import logging
134
6a33aa624ba4 fixed more oopsies
casties
parents: 133
diff changeset
8 import urllib
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
9 import urlparse
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
10 import base64
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
11
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
12 from SrvTxtUtils import getInt, getText, getHttpData
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
13
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
14 def serialize(node):
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
15 """returns a string containing an XML snippet of node"""
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
16 s = ET.tostring(node, 'UTF-8')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
17 # snip off XML declaration
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
18 if s.startswith('<?xml'):
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
19 i = s.find('?>')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
20 return s[i+3:]
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
21
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
22 return s
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
23
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
24
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
25 class MpdlXmlTextServer(SimpleItem):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
26 """TextServer implementation for MPDL-XML eXist server"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
27 meta_type="MPDL-XML TextServer"
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
28
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
29 manage_options=(
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
30 {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'},
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
31 )+SimpleItem.manage_options
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
32
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
33 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
34
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
35 def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
36 """constructor"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
37 self.id=id
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
38 self.title=title
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
39 self.timeout = timeout
132
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
40 if serverName is None:
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
41 self.serverUrl = serverUrl
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
42 else:
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
43 self.serverUrl = "http://%s/mpdl/interface/"%serverName
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
44
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
45 def getHttpData(self, url, data=None):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
46 """returns result from url+data HTTP request"""
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
47 return getHttpData(url,data,timeout=self.timeout)
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
48
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
49 def getServerData(self, method, data=None):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
50 """returns result from text server for method+data"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
51 url = self.serverUrl+method
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
52 return getHttpData(url,data,timeout=self.timeout)
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
53
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
54
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
55 def getPlacesOnPage(self, docinfo=None, pn=None):
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
56 """Returns list of GIS places of page pn"""
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
57 docpath = docinfo.get('textURLPath',None)
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
58 if not docpath:
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
59 return None
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
60
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
61 places=[]
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
62 text=self.getServerData("xpath.xql", "document=%s&xpath=//place&pn=%s"%(docpath,pn))
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
63 dom = ET.fromstring(text)
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
64 result = dom.findall(".//resultPage/place")
236
ccdce5aca47b *** empty log message ***
abukhman
parents: 233
diff changeset
65 for l in result:
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
66 id = l.get("id")
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
67 name = l.text
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
68 place = {'id': id, 'name': name}
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
69 places.append(place)
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
70
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
71 return places
307
ec5e920a61e6 *** empty log message ***
abukhman
parents: 306
diff changeset
72
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
73
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
74 def processPageInfo(self, dom, docinfo, pageinfo):
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
75 """processes page info divs from dom and stores in docinfo and pageinfo"""
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
76 # assume first second level div is pageMeta
15394486ab75 working with new templates
casties
parents: 465
diff changeset
77 alldivs = dom.find("div")
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
78
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
79 if alldivs is None or alldivs.get('class', '') != 'pageMeta':
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
80 logging.error("processPageInfo: pageMeta div not found!")
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
81 return
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
82
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
83 for div in alldivs:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
84 dc = div.get('class')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
85
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
86 # pageNumberOrig
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
87 if dc == 'pageNumberOrig':
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
88 pageinfo['pageNumberOrig'] = div.text
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
89
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
90 # pageNumberOrigNorm
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
91 elif dc == 'pageNumberOrigNorm':
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
92 pageinfo['pageNumberOrigNorm'] = div.text
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
93
474
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
94 # pageHeaderTitle
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
95 elif dc == 'pageHeaderTitle':
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
96 pageinfo['pageHeaderTitle'] = div.text
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
97
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
98 # numFigureEntries
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
99 elif dc == 'countFigureEntries':
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
100 docinfo['numFigureEntries'] = getInt(div.text)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
101
474
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
102 # numTocEntries
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
103 elif dc == 'countTocEntries':
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
104 # WTF: s1 = int(s)/30+1
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
105 docinfo['numTocEntries'] = getInt(div.text)
15394486ab75 working with new templates
casties
parents: 465
diff changeset
106
475
5a3ab27385ce more new template stuff
casties
parents: 474
diff changeset
107 # numPlaces
5a3ab27385ce more new template stuff
casties
parents: 474
diff changeset
108 elif dc == 'countPlaces':
5a3ab27385ce more new template stuff
casties
parents: 474
diff changeset
109 docinfo['numPlaces'] = getInt(div.text)
5a3ab27385ce more new template stuff
casties
parents: 474
diff changeset
110
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
111 # numTextPages
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
112 elif dc == 'countPages':
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
113 np = getInt(div.text)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
114 if np > 0:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
115 docinfo['numTextPages'] = np
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
116 if docinfo.get('numPages', 0) == 0:
465
224aad394350 really works with new getDocinfo
casties
parents: 458
diff changeset
117 # seems to be text-only - update page count
224aad394350 really works with new getDocinfo
casties
parents: 458
diff changeset
118 docinfo['numPages'] = np
477
17f0290b2327 small fixes.
casties
parents: 476
diff changeset
119 #pageinfo['end'] = min(pageinfo['end'], np)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
120 pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
121 if np % pageinfo['groupsize'] > 0:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
122 pageinfo['numgroups'] += 1
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
123
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
124 #logging.debug("processPageInfo: pageinfo=%s"%repr(pageinfo))
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
125 return
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
126
388
0265ab93716a *** empty log message ***
abukhman
parents: 386
diff changeset
127
471
415a7026eeda split viewMode in viewMode and viewType
casties
parents: 469
diff changeset
128 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
129 """returns single page from fulltext"""
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
130
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
131 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn))
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
132 # check for cached text -- but ideally this shouldn't be called twice
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
133 if pageinfo.has_key('textPage'):
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
134 logging.debug("getTextPage: using cached text")
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
135 return pageinfo['textPage']
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
136
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
137 docpath = docinfo['textURLPath']
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
138 # just checking
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
139 if pageinfo['current'] != pn:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
140 logging.warning("getTextPage: current!=pn!")
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
141
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
142 # stuff for constructing full urls
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
143 selfurl = docinfo['viewerUrl']
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
144 textParams = {'document': docpath,
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
145 'pn': pn}
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
146 if 'characterNormalization' in pageinfo:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
147 textParams['characterNormalization'] = pageinfo['characterNormalization']
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
148
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
149 if not mode:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
150 # default is dict
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
151 mode = 'text'
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
152
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
153 modes = mode.split(',')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
154 # check for multiple layers
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
155 if len(modes) > 1:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
156 logging.debug("getTextPage: more than one mode=%s"%mode)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
157
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
158 # search mode
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
159 if 'search' in modes:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
160 # add highlighting
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
161 highlightQuery = pageinfo.get('highlightQuery', None)
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
162 if highlightQuery:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
163 textParams['highlightQuery'] = highlightQuery
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
164 textParams['highlightElement'] = pageinfo.get('highlightElement', '')
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
165 textParams['highlightElementPos'] = pageinfo.get('highlightElementPos', '')
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
166
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
167 # ignore mode in the following
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
168 modes.remove('search')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
169
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
170 # other modes don't combine
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
171 if 'dict' in modes:
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
172 # dict is called textPollux in the backend
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
173 textmode = 'textPollux'
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
174 elif len(modes) == 0:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
175 # text is default mode
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
176 textmode = 'text'
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
177 else:
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
178 # just take first mode
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
179 textmode = modes[0]
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
180
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
181 textParams['mode'] = textmode
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
182
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
183 # fetch the page
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
184 pagexml = self.getServerData("page-fragment.xql",urllib.urlencode(textParams))
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
185 dom = ET.fromstring(pagexml)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
186 # extract additional info
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
187 self.processPageInfo(dom, docinfo, pageinfo)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
188 # page content is in <div class="pageContent">
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
189 pagediv = None
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
190 # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent']
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
191 # so we look at the second level divs
15394486ab75 working with new templates
casties
parents: 465
diff changeset
192 alldivs = dom.findall("div")
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
193 for div in alldivs:
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
194 dc = div.get('class')
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
195 # page content div
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
196 if dc == 'pageContent':
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
197 pagediv = div
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
198 break
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
199
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
200 # plain text mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
201 if textmode == "text":
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
202 # get full url assuming documentViewer is parent
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
203 selfurl = self.getLink()
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
204 if pagediv is not None:
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
205 links = pagediv.findall(".//a")
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
206 for l in links:
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
207 href = l.get('href')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
208 if href and href.startswith('#note-'):
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
209 href = href.replace('#note-',"%s#note-"%selfurl)
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
210 l.set('href', href)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
211
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
212 return serialize(pagediv)
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
213
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
214 # text-with-links mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
215 elif textmode == "textPollux":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
216 if pagediv is not None:
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
217 viewerurl = docinfo['viewerUrl']
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
218 selfurl = self.getLink()
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
219 # check all a-tags
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
220 links = pagediv.findall(".//a")
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
221 for l in links:
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
222 href = l.get('href')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
223
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
224 if href:
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
225 # is link with href
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
226 linkurl = urlparse.urlparse(href)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
227 #logging.debug("getTextPage: linkurl=%s"%repr(linkurl))
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
228 if linkurl.path.endswith('GetDictionaryEntries'):
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
229 #TODO: replace wordInfo page
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
230 # is dictionary link - change href (keeping parameters)
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
231 #l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl))
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
232 # add target to open new page
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
233 l.set('target', '_blank')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
234
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
235 # TODO: is this needed?
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
236 # if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
237 # selfurl = self.absolute_url()
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
238 # l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
239 # l.set('target', '_blank')
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
240 # l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
241 # l.set('ondblclick', 'popupWin.focus();')
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
242
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
243 if href.startswith('#note-'):
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
244 # note link
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
245 l.set('href', href.replace('#note-',"%s#note-"%selfurl))
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
246
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
247 return serialize(pagediv)
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
248
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
249 # xml mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
250 elif textmode == "xml":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
251 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
252 return serialize(pagediv)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
253
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
254 # pureXml mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
255 elif textmode == "pureXml":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
256 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
257 return serialize(pagediv)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
258
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
259 # gis mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
260 elif textmode == "gis":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
261 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
262 # check all a-tags
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
263 links = pagediv.findall(".//a")
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
264 # add our URL as backlink
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
265 selfurl = self.getLink()
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
266 doc = base64.b64encode(selfurl)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
267 for l in links:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
268 href = l.get('href')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
269 if href:
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
270 if href.startswith('http://mappit.mpiwg-berlin.mpg.de'):
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
271 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href))
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
272 l.set('target', '_blank')
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
273
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
274 return serialize(pagediv)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
275
501
29c6d09a506c more cleanup.
casties
parents: 482
diff changeset
276 return None
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
277
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
278
509
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
279 def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None):
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
280 """loads list of search results and stores XML in docinfo"""
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
281
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
282 logging.debug("getSearchResults mode=%s query=%s"%(mode, query))
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
283 if mode == "none":
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
284 return docinfo
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
285
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
286 cachedQuery = docinfo.get('cachedQuery', None)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
287 if cachedQuery is not None:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
288 # cached search result
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
289 if cachedQuery == '%s_%s'%(mode,query):
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
290 # same query
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
291 return docinfo
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
292
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
293 else:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
294 # different query
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
295 del docinfo['resultSize']
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
296 del docinfo['resultXML']
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
297
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
298 # cache query
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
299 docinfo['cachedQuery'] = '%s_%s'%(mode,query)
509
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
300
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
301 # fetch full results
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
302 docpath = docinfo['textURLPath']
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
303 params = {'document': docpath,
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
304 'mode': 'text',
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
305 'queryType': mode,
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
306 'query': query,
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
307 'queryResultPageSize': 1000,
509
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
308 'queryResultPN': 1,
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
309 'characterNormalization': pageinfo.get('characterNormalization', 'reg')}
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
310 pagexml = self.getServerData("doc-query.xql",urllib.urlencode(params))
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
311 #pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&s=%s&viewMode=%s&characterNormalization=%s&highlightElementPos=%s&highlightElement=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, s, viewMode,characterNormalization, highlightElementPos, highlightElement, urllib.quote(highlightQuery)))
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
312 dom = ET.fromstring(pagexml)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
313 # page content is in <div class="queryResultPage">
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
314 pagediv = None
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
315 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage']
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
316 alldivs = dom.findall("div")
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
317 for div in alldivs:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
318 dc = div.get('class')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
319 # page content div
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
320 if dc == 'queryResultPage':
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
321 pagediv = div
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
322
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
323 elif dc == 'queryResultHits':
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
324 docinfo['resultSize'] = getInt(div.text)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
325
510
4fb35343d2e7 more search. nicer css.
casties
parents: 509
diff changeset
326 if pagediv is not None:
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
327 # store XML in docinfo
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
328 docinfo['resultXML'] = ET.tostring(pagediv, 'UTF-8')
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
329
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
330 return docinfo
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
331
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
332
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
333 def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None):
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
334 """returns single page from the table of contents"""
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
335 logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn))
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
336 # check for cached result
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
337 if not 'resultXML' in docinfo:
509
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
338 self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
339
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
340 resultxml = docinfo.get('resultXML', None)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
341 if not resultxml:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
342 logging.error("getResultPage: unable to find resultXML")
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
343 return "Error: no result!"
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
344
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
345 if size is None:
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
346 size = pageinfo.get('resultPageSize', 10)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
347
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
348 if start is None:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
349 start = (pn - 1) * size
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
350
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
351 fullresult = ET.fromstring(resultxml)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
352
510
4fb35343d2e7 more search. nicer css.
casties
parents: 509
diff changeset
353 if fullresult is not None:
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
354 # paginate
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
355 first = start-1
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
356 len = size
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
357 del fullresult[:first]
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
358 del fullresult[len:]
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
359 tocdivs = fullresult
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
360
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
361 # check all a-tags
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
362 links = tocdivs.findall(".//a")
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
363 for l in links:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
364 href = l.get('href')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
365 if href:
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
366 # assume all links go to pages
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
367 linkUrl = urlparse.urlparse(href)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
368 linkParams = urlparse.parse_qs(linkUrl.query)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
369 # take some parameters
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
370 params = {'pn': linkParams['pn'],
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
371 'highlightQuery': linkParams.get('highlightQuery',''),
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
372 'highlightElement': linkParams.get('highlightElement',''),
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
373 'highlightElementPos': linkParams.get('highlightElementPos','')
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
374 }
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
375 url = self.getLink(params=params)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
376 l.set('href', url)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
377
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
378 return serialize(tocdivs)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
379
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
380 return "ERROR: no results!"
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
381
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
382
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
383 def getToc(self, mode="text", docinfo=None):
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
384 """loads table of contents and stores XML in docinfo"""
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
385 logging.debug("getToc mode=%s"%mode)
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
386 if mode == "none":
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
387 return docinfo
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
388
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
389 if 'tocSize_%s'%mode in docinfo:
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
390 # cached toc
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
391 return docinfo
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
392
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
393 docpath = docinfo['textURLPath']
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
394 # we need to set a result set size
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
395 pagesize = 1000
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
396 pn = 1
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
397 if mode == "text":
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
398 queryType = "toc"
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
399 else:
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
400 queryType = mode
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
401 # number of entries in toc
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
402 tocSize = 0
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
403 tocDiv = None
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
404 # fetch full toc
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
405 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
406 dom = ET.fromstring(pagexml)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
407 # page content is in <div class="queryResultPage">
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
408 pagediv = None
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
409 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage']
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
410 alldivs = dom.findall("div")
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
411 for div in alldivs:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
412 dc = div.get('class')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
413 # page content div
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
414 if dc == 'queryResultPage':
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
415 pagediv = div
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
416
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
417 elif dc == 'queryResultHits':
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
418 docinfo['tocSize_%s'%mode] = getInt(div.text)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
419
510
4fb35343d2e7 more search. nicer css.
casties
parents: 509
diff changeset
420 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
421 # store XML in docinfo
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
422 docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
423
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
424 return docinfo
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
425
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
426 def getTocPage(self, mode="text", pn=None, start=None, size=None, pageinfo=None, docinfo=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
427 """returns single page from the table of contents"""
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
428 logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn))
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
429 if mode == "text":
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
430 queryType = "toc"
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
431 else:
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
432 queryType = mode
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
433
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
434 # check for cached TOC
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
435 if not docinfo.has_key('tocXML_%s'%mode):
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
436 self.getToc(mode=mode, docinfo=docinfo)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
437
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
438 tocxml = docinfo.get('tocXML_%s'%mode, None)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
439 if not tocxml:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
440 logging.error("getTocPage: unable to find tocXML")
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
441 return "Error: no table of contents!"
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
442
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
443 if size is None:
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
444 size = pageinfo.get('tocPageSize', 30)
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
445
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
446 if start is None:
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
447 start = (pn - 1) * size
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
448
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
449 fulltoc = ET.fromstring(tocxml)
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
450
510
4fb35343d2e7 more search. nicer css.
casties
parents: 509
diff changeset
451 if fulltoc is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
452 # paginate
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
453 first = (start - 1) * 2
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
454 len = size * 2
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
455 del fulltoc[:first]
456
b27a7d2f06ff even toc pagination works
casties
parents: 455
diff changeset
456 del fulltoc[len:]
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
457 tocdivs = fulltoc
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
458
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
459 # check all a-tags
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
460 links = tocdivs.findall(".//a")
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
461 for l in links:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
462 href = l.get('href')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
463 if href:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
464 # take pn from href
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
465 m = re.match(r'page-fragment\.xql.*pn=(\d+)', href)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
466 if m is not None:
476
1d93a8cb2d8f more new template stuff
casties
parents: 475
diff changeset
467 # and create new url (assuming parent is documentViewer)
1d93a8cb2d8f more new template stuff
casties
parents: 475
diff changeset
468 url = self.getLink('pn', m.group(1))
1d93a8cb2d8f more new template stuff
casties
parents: 475
diff changeset
469 l.set('href', url)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
470 else:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
471 logging.warning("getTocPage: Problem with link=%s"%href)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
472
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
473 # fix two-divs-per-row with containing div
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
474 newtoc = ET.Element('div', {'class':'queryResultPage'})
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
475 for (d1,d2) in zip(tocdivs[::2],tocdivs[1::2]):
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
476 e = ET.Element('div',{'class':'tocline'})
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
477 e.append(d1)
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
478 e.append(d2)
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
479 newtoc.append(e)
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
480
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
481 return serialize(newtoc)
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
482
7ca8ac7db06e more new template stuff. more batching methods in documentViewer.
casties
parents: 478
diff changeset
483 return "ERROR: no table of contents!"
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
484
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
485
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
486 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
487 """change settings"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
488 self.title=title
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
489 self.timeout = timeout
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
490 self.serverUrl = serverUrl
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
491 if RESPONSE is not None:
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
492 RESPONSE.redirect('manage_main')
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
493
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
494 # management methods
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
495 def manage_addMpdlXmlTextServerForm(self):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
496 """Form for adding"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
497 pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self)
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
498 return pt()
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
499
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
500 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
501 #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
502 """add zogiimage"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
503 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
504 self.Destination()._setObject(id, newObj)
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
505 if RESPONSE is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
506 RESPONSE.redirect('manage_main')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
507
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
508