annotate MpdlXmlTextServer.py @ 529:f0e28d31ebc6

fixed bug with viewmode xml.
author casties
date Thu, 26 Apr 2012 14:45:05 +0200
parents 652cc8d3f1a9
children 5c7433c2515c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
1 from OFS.SimpleItem import SimpleItem
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
3
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
4 import xml.etree.ElementTree as ET
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
5
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
6 import re
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
7 import logging
134
6a33aa624ba4 fixed more oopsies
casties
parents: 133
diff changeset
8 import urllib
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
9 import urlparse
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
10 import base64
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
11
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
12 from SrvTxtUtils import getInt, getText, getHttpData
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
13
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
14 def serialize(node):
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
15 """returns a string containing an XML snippet of node"""
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
16 s = ET.tostring(node, 'UTF-8')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
17 # snip off XML declaration
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
18 if s.startswith('<?xml'):
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
19 i = s.find('?>')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
20 return s[i+3:]
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
21
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
22 return s
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
23
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
24
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
25 class MpdlXmlTextServer(SimpleItem):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
26 """TextServer implementation for MPDL-XML eXist server"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
27 meta_type="MPDL-XML TextServer"
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
28
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
29 manage_options=(
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
30 {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'},
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
31 )+SimpleItem.manage_options
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
32
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
33 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
34
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
35 def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
36 """constructor"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
37 self.id=id
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
38 self.title=title
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
39 self.timeout = timeout
132
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
40 if serverName is None:
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
41 self.serverUrl = serverUrl
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
42 else:
39167e96e582 fixed creation of text server on __init__
casties
parents: 129
diff changeset
43 self.serverUrl = "http://%s/mpdl/interface/"%serverName
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
44
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
45 def getHttpData(self, url, data=None):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
46 """returns result from url+data HTTP request"""
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
47 return getHttpData(url,data,timeout=self.timeout)
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
48
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
49 def getServerData(self, method, data=None):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
50 """returns result from text server for method+data"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
51 url = self.serverUrl+method
458
48b135b089c8 more renovation
casties
parents: 456
diff changeset
52 return getHttpData(url,data,timeout=self.timeout)
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
53
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
54
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
55 def getPlacesOnPage(self, docinfo=None, pn=None):
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
56 """Returns list of GIS places of page pn"""
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
57 docpath = docinfo.get('textURLPath',None)
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
58 if not docpath:
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
59 return None
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
60
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
61 places=[]
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
62 text=self.getServerData("xpath.xql", "document=%s&xpath=//place&pn=%s"%(docpath,pn))
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
63 dom = ET.fromstring(text)
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
64 result = dom.findall(".//resultPage/place")
236
ccdce5aca47b *** empty log message ***
abukhman
parents: 233
diff changeset
65 for l in result:
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
66 id = l.get("id")
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
67 name = l.text
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
68 place = {'id': id, 'name': name}
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
69 places.append(place)
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
70
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
71 return places
307
ec5e920a61e6 *** empty log message ***
abukhman
parents: 306
diff changeset
72
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
73
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
74 def getTextInfo(self, mode='', docinfo=None):
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
75 """reads document info, including page concordance, from text server"""
518
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
76 logging.debug("getTextInfo mode=%s"%mode)
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
77 if mode not in ['toc', 'figures', '']:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
78 mode = ''
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
79 # check cached info
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
80 if mode:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
81 # cached toc-request?
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
82 if 'full_%s'%mode in docinfo:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
83 return docinfo
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
84
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
85 else:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
86 # no toc-request
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
87 if 'numTextPages' in docinfo:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
88 return docinfo
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
89
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
90 docpath = docinfo.get('textURLPath', None)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
91 if docpath is None:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
92 logging.error("getTextInfo: no textURLPath!")
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
93 return docinfo
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
94
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
95 # we need to set a result set size
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
96 pagesize = 10000
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
97 pn = 1
518
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
98 # fetch docinfo
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
99 pagexml = self.getServerData("doc-info.xql","document=%s&info=%s&pageSize=%s&pn=%s"%(docpath,mode,pagesize,pn))
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
100 dom = ET.fromstring(pagexml)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
101 # all info in tag <document>
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
102 doc = dom.find("document")
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
103 if doc is None:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
104 logging.error("getTextInfo: unable to find document-tag!")
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
105 else:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
106 # go through all child elements
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
107 for tag in doc:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
108 name = tag.tag
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
109 # numTextPages
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
110 if name == 'countPages':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
111 np = getInt(tag.text)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
112 if np > 0:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
113 docinfo['numTextPages'] = np
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
114
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
115 # numFigureEntries
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
116 elif name == 'countFigureEntries':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
117 docinfo['numFigureEntries'] = getInt(tag.text)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
118
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
119 # numTocEntries
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
120 elif name == 'countTocEntries':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
121 # WTF: s1 = int(s)/30+1
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
122 docinfo['numTocEntries'] = getInt(tag.text)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
123
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
124 # numPlaces
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
125 elif name == 'countPlaces':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
126 docinfo['numPlaces'] = getInt(tag.text)
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
127
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
128 # pageNumbers
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
129 elif name == 'pageNumbers':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
130 # contains tags with page numbers
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
131 # <pn><n>4</n><no>4</no><non/></pn>
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
132 # n=scan number, no=original page no, non=normalized original page no
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
133 # pageNumbers is a dict indexed by scan number
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
134 pages = {}
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
135 for pn in tag:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
136 page = {}
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
137 n = 0
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
138 for p in pn:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
139 if p.tag == 'n':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
140 n = getInt(p.text)
518
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
141 page['pn'] = n
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
142 elif p.tag == 'no':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
143 page['no'] = p.text
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
144 elif p.tag == 'non':
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
145 page['non'] = p.text
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
146
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
147 if n > 0:
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
148 pages[n] = page
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
149
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
150 docinfo['pageNumbers'] = pages
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
151 #logging.debug("got pageNumbers=%s"%repr(pages))
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
152
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
153 # toc
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
154 elif name == 'toc':
518
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
155 # contains tags with table of contents/figures
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
156 # <toc-entry><page>13</page><level>3</level><content>Chapter I</content><level-string>1.</level-string><real-level>1</real-level></toc-entry>
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
157 tocs = []
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
158 for te in tag:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
159 toc = {}
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
160 for t in te:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
161 if t.tag == 'page':
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
162 toc['pn'] = getInt(t.text)
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
163 elif t.tag == 'level':
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
164 toc['level'] = t.text
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
165 elif t.tag == 'content':
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
166 toc['content'] = t.text
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
167 elif t.tag == 'level-string':
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
168 toc['level-string'] = t.text
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
169 elif t.tag == 'real-level':
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
170 toc['real-level'] = t.text
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
171
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
172 tocs.append(toc)
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
173
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
174 # save as full_toc/full_figures
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
175 docinfo['full_%s'%mode] = tocs
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
176
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
177 return docinfo
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
178
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
179
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
180 def processPageInfo(self, dom, docinfo, pageinfo):
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
181 """processes page info divs from dom and stores in docinfo and pageinfo"""
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
182 # assume first second level div is pageMeta
15394486ab75 working with new templates
casties
parents: 465
diff changeset
183 alldivs = dom.find("div")
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
184
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
185 if alldivs is None or alldivs.get('class', '') != 'pageMeta':
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
186 logging.error("processPageInfo: pageMeta div not found!")
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
187 return
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
188
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
189 for div in alldivs:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
190 dc = div.get('class')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
191
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
192 # pageNumberOrig
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
193 if dc == 'pageNumberOrig':
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
194 pageinfo['pageNumberOrig'] = div.text
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
195
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
196 # pageNumberOrigNorm
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
197 elif dc == 'pageNumberOrigNorm':
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
198 pageinfo['pageNumberOrigNorm'] = div.text
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
199
474
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
200 # pageHeaderTitle
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
201 elif dc == 'pageHeaderTitle':
0bc4a153863a more cleanup (before reversing macro order)
casties
parents: 473
diff changeset
202 pageinfo['pageHeaderTitle'] = div.text
517
aaacdf551f6f remove global info from processPageInfo.
casties
parents: 516
diff changeset
203
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
204 #logging.debug("processPageInfo: pageinfo=%s"%repr(pageinfo))
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
205 return
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
206
388
0265ab93716a *** empty log message ***
abukhman
parents: 386
diff changeset
207
471
415a7026eeda split viewMode in viewMode and viewType
casties
parents: 469
diff changeset
208 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
209 """returns single page from fulltext"""
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
210
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
211 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn))
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
212 # check for cached text -- but ideally this shouldn't be called twice
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
213 if pageinfo.has_key('textPage'):
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
214 logging.debug("getTextPage: using cached text")
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
215 return pageinfo['textPage']
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
216
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
217 docpath = docinfo['textURLPath']
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
218 # just checking
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
219 if pageinfo['current'] != pn:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
220 logging.warning("getTextPage: current!=pn!")
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
221
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
222 # stuff for constructing full urls
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
223 selfurl = docinfo['viewerUrl']
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
224 textParams = {'document': docpath,
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
225 'pn': pn}
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
226 if 'characterNormalization' in pageinfo:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
227 textParams['characterNormalization'] = pageinfo['characterNormalization']
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
228
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
229 if not mode:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
230 # default is dict
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
231 mode = 'text'
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
232
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
233 modes = mode.split(',')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
234 # check for multiple layers
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
235 if len(modes) > 1:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
236 logging.debug("getTextPage: more than one mode=%s"%mode)
527
652cc8d3f1a9 fixed bug with text when layer is only annotator.
casties
parents: 519
diff changeset
237
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
238 # search mode
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
239 if 'search' in modes:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
240 # add highlighting
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
241 highlightQuery = pageinfo.get('highlightQuery', None)
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
242 if highlightQuery:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
243 textParams['highlightQuery'] = highlightQuery
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
244 textParams['highlightElement'] = pageinfo.get('highlightElement', '')
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
245 textParams['highlightElementPos'] = pageinfo.get('highlightElementPos', '')
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
246
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
247 # ignore mode in the following
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
248 modes.remove('search')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
249
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
250 # other modes don't combine
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
251 if 'dict' in modes:
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
252 # dict is called textPollux in the backend
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
253 textmode = 'textPollux'
529
f0e28d31ebc6 fixed bug with viewmode xml.
casties
parents: 527
diff changeset
254 elif 'xml' in modes:
f0e28d31ebc6 fixed bug with viewmode xml.
casties
parents: 527
diff changeset
255 # xml mode
f0e28d31ebc6 fixed bug with viewmode xml.
casties
parents: 527
diff changeset
256 textmode = 'xml'
f0e28d31ebc6 fixed bug with viewmode xml.
casties
parents: 527
diff changeset
257 textParams['characterNormalization'] = 'orig'
527
652cc8d3f1a9 fixed bug with text when layer is only annotator.
casties
parents: 519
diff changeset
258 else:
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
259 # text is default mode
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
260 textmode = 'text'
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
261
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
262 textParams['mode'] = textmode
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
263
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
264 # fetch the page
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
265 pagexml = self.getServerData("page-fragment.xql",urllib.urlencode(textParams))
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
266 dom = ET.fromstring(pagexml)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
267 # extract additional info
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
268 self.processPageInfo(dom, docinfo, pageinfo)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
269 # page content is in <div class="pageContent">
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
270 pagediv = None
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
271 # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent']
469
15394486ab75 working with new templates
casties
parents: 465
diff changeset
272 # so we look at the second level divs
15394486ab75 working with new templates
casties
parents: 465
diff changeset
273 alldivs = dom.findall("div")
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
274 for div in alldivs:
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
275 dc = div.get('class')
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
276 # page content div
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
277 if dc == 'pageContent':
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
278 pagediv = div
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
279 break
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
280
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
281 # plain text mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
282 if textmode == "text":
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
283 # get full url assuming documentViewer is parent
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
284 selfurl = self.getLink()
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
285 if pagediv is not None:
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
286 links = pagediv.findall(".//a")
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
287 for l in links:
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
288 href = l.get('href')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
289 if href and href.startswith('#note-'):
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
290 href = href.replace('#note-',"%s#note-"%selfurl)
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
291 l.set('href', href)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
292
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
293 return serialize(pagediv)
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
294
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
295 # text-with-links mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
296 elif textmode == "textPollux":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
297 if pagediv is not None:
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
298 viewerurl = docinfo['viewerUrl']
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
299 selfurl = self.getLink()
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
300 # check all a-tags
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
301 links = pagediv.findall(".//a")
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
302 for l in links:
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
303 href = l.get('href')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
304
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
305 if href:
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
306 # is link with href
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
307 linkurl = urlparse.urlparse(href)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
308 #logging.debug("getTextPage: linkurl=%s"%repr(linkurl))
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
309 if linkurl.path.endswith('GetDictionaryEntries'):
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
310 #TODO: replace wordInfo page
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
311 # is dictionary link - change href (keeping parameters)
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
312 #l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl))
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
313 # add target to open new page
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
314 l.set('target', '_blank')
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
315
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
316 # TODO: is this needed?
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
317 # if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
318 # selfurl = self.absolute_url()
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
319 # l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
320 # l.set('target', '_blank')
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
321 # l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
322 # l.set('ondblclick', 'popupWin.focus();')
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
323
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
324 if href.startswith('#note-'):
478
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
325 # note link
cd37d6f8d5e8 more cleanup
casties
parents: 477
diff changeset
326 l.set('href', href.replace('#note-',"%s#note-"%selfurl))
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
327
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
328 return serialize(pagediv)
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
329
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
330 # xml mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
331 elif textmode == "xml":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
332 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
333 return serialize(pagediv)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
334
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
335 # pureXml mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
336 elif textmode == "pureXml":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
337 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
338 return serialize(pagediv)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
339
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
340 # gis mode
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
341 elif textmode == "gis":
473
74e9e74277e9 smaller improvements
casties
parents: 471
diff changeset
342 if pagediv is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
343 # check all a-tags
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
344 links = pagediv.findall(".//a")
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
345 # add our URL as backlink
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
346 selfurl = self.getLink()
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
347 doc = base64.b64encode(selfurl)
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
348 for l in links:
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
349 href = l.get('href')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
350 if href:
506
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
351 if href.startswith('http://mappit.mpiwg-berlin.mpg.de'):
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
352 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href))
67014399894d cleaned out all 4suite code and weird methods.
casties
parents: 501
diff changeset
353 l.set('target', '_blank')
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
354
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
355 return serialize(pagediv)
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
356
501
29c6d09a506c more cleanup.
casties
parents: 482
diff changeset
357 return None
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
358
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
359
509
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
360 def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None):
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
361 """loads list of search results and stores XML in docinfo"""
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
362
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
363 logging.debug("getSearchResults mode=%s query=%s"%(mode, query))
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
364 if mode == "none":
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
365 return docinfo
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
366
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
367 cachedQuery = docinfo.get('cachedQuery', None)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
368 if cachedQuery is not None:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
369 # cached search result
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
370 if cachedQuery == '%s_%s'%(mode,query):
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
371 # same query
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
372 return docinfo
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
373
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
374 else:
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
375 # different query
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
376 del docinfo['resultSize']
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
377 del docinfo['resultXML']
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
378
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
379 # cache query
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
380 docinfo['cachedQuery'] = '%s_%s'%(mode,query)
509
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
381
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
382 # fetch full results
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
383 docpath = docinfo['textURLPath']
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
384 params = {'document': docpath,
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
385 'mode': 'text',
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
386 'queryType': mode,
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
387 'query': query,
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
388 'queryResultPageSize': 1000,
509
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
389 'queryResultPN': 1,
9d05befdd462 try to get characterNormalization in search result working.
casties
parents: 508
diff changeset
390 'characterNormalization': pageinfo.get('characterNormalization', 'reg')}
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
391 pagexml = self.getServerData("doc-query.xql",urllib.urlencode(params))
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
392 #pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&s=%s&viewMode=%s&characterNormalization=%s&highlightElementPos=%s&highlightElement=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, s, viewMode,characterNormalization, highlightElementPos, highlightElement, urllib.quote(highlightQuery)))
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
393 dom = ET.fromstring(pagexml)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
394 # page content is in <div class="queryResultPage">
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
395 pagediv = None
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
396 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage']
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
397 alldivs = dom.findall("div")
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
398 for div in alldivs:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
399 dc = div.get('class')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
400 # page content div
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
401 if dc == 'queryResultPage':
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
402 pagediv = div
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
403
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
404 elif dc == 'queryResultHits':
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
405 docinfo['resultSize'] = getInt(div.text)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
406
510
4fb35343d2e7 more search. nicer css.
casties
parents: 509
diff changeset
407 if pagediv is not None:
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
408 # store XML in docinfo
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
409 docinfo['resultXML'] = ET.tostring(pagediv, 'UTF-8')
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
410
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
411 return docinfo
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
412
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
413
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
414 def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None):
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
415 """returns single page from the table of contents"""
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
416 logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn))
516
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
417 # get (cached) result
7d7b639d7be7 add methods to use doc-info.xql.
casties
parents: 513
diff changeset
418 self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
419
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
420 resultxml = docinfo.get('resultXML', None)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
421 if not resultxml:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
422 logging.error("getResultPage: unable to find resultXML")
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
423 return "Error: no result!"
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
424
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
425 if size is None:
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
426 size = pageinfo.get('resultPageSize', 10)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
427
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
428 if start is None:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
429 start = (pn - 1) * size
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
430
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
431 fullresult = ET.fromstring(resultxml)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
432
510
4fb35343d2e7 more search. nicer css.
casties
parents: 509
diff changeset
433 if fullresult is not None:
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
434 # paginate
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
435 first = start-1
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
436 len = size
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
437 del fullresult[:first]
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
438 del fullresult[len:]
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
439 tocdivs = fullresult
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
440
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
441 # check all a-tags
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
442 links = tocdivs.findall(".//a")
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
443 for l in links:
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
444 href = l.get('href')
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
445 if href:
511
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
446 # assume all links go to pages
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
447 linkUrl = urlparse.urlparse(href)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
448 linkParams = urlparse.parse_qs(linkUrl.query)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
449 # take some parameters
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
450 params = {'pn': linkParams['pn'],
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
451 'highlightQuery': linkParams.get('highlightQuery',''),
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
452 'highlightElement': linkParams.get('highlightElement',''),
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
453 'highlightElementPos': linkParams.get('highlightElementPos','')
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
454 }
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
455 url = self.getLink(params=params)
551ca1641a5e more cleanup.
casties
parents: 510
diff changeset
456 l.set('href', url)
508
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
457
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
458 return serialize(tocdivs)
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
459
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
460 return "ERROR: no results!"
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
461
d5a47f82e755 more cleanup.
casties
parents: 506
diff changeset
462
518
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
463 def getToc(self, mode='text', docinfo=None):
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
464 """returns list of table of contents from docinfo"""
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
465 logging.debug("getToc mode=%s"%mode)
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
466 if mode == 'text':
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
467 queryType = 'toc'
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
468 else:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
469 queryType = mode
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
470
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
471 if not 'full_%s'%queryType in docinfo:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
472 # get new toc
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
473 docinfo = self.getTextInfo(queryType, docinfo)
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
474
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
475 return docinfo.get('full_%s'%queryType, [])
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
476
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
477 def getTocPage(self, mode='text', pn=None, start=None, size=None, pageinfo=None, docinfo=None):
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
478 """returns single page from the table of contents"""
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
479 logging.debug("getTocPage mode=%s, pn=%s start=%s size=%s"%(mode,repr(pn),repr(start),repr(size)))
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
480 fulltoc = self.getToc(mode=mode, docinfo=docinfo)
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
481 if len(fulltoc) < 1:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
482 logging.error("getTocPage: unable to find toc!")
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
483 return "Error: no table of contents!"
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
484
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
485 if size is None:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
486 size = pageinfo.get('tocPageSize', 30)
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
487
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
488 if start is None:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
489 start = (pn - 1) * size
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
490
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
491 # paginate
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
492 first = (start - 1)
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
493 last = first + size
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
494 tocs = fulltoc[first:last]
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
495 tp = '<div>'
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
496 for toc in tocs:
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
497 pageurl = self.getLink('pn', toc['pn'])
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
498 tp += '<div class="tocline">'
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
499 tp += '<div class="toc name">[%s %s]</div>'%(toc['level-string'], toc['content'])
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
500 tp += '<div class="toc float right page"><a href="%s">Page: %s</a></div>'%(pageurl, toc['pn'])
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
501 tp += '</div>\n'
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
502
519
9a3cc3732194 uses xml from doc-info.xql for table of contents now.
casties
parents: 518
diff changeset
503 tp += '</div>\n'
518
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
504
91051b36b9cc uses xml info from doc-info.xql for table of contents now.
casties
parents: 517
diff changeset
505 return tp
519
9a3cc3732194 uses xml from doc-info.xql for table of contents now.
casties
parents: 518
diff changeset
506
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
507
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
508 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
509 """change settings"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
510 self.title=title
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
511 self.timeout = timeout
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
512 self.serverUrl = serverUrl
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
513 if RESPONSE is not None:
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
514 RESPONSE.redirect('manage_main')
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
515
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
516 # management methods
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
517 def manage_addMpdlXmlTextServerForm(self):
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
518 """Form for adding"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
519 pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self)
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
520 return pt()
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
521
453
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
522 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
beb7ccb92564 first version using elementtree instead of 4suite xml
casties
parents: 407
diff changeset
523 #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
129
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
524 """add zogiimage"""
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
525 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
526 self.Destination()._setObject(id, newObj)
9404b6c37920 more modular version with separate object MpdlXmlTextServer
casties
parents:
diff changeset
527 if RESPONSE is not None:
455
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
528 RESPONSE.redirect('manage_main')
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
529
0a53fea83df7 more work renovating
casties
parents: 453
diff changeset
530