Changeset 508:d5a47f82e755 in documentViewer
- Timestamp:
- Feb 27, 2012, 8:26:52 PM (13 years ago)
- Branch:
- elementtree
- Files:
-
- 2 deleted
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
MpdlXmlTextServer.py
r506 r508 127 127 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None): 128 128 """returns single page from fulltext""" 129 129 130 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn)) 130 131 # check for cached text -- but ideally this shouldn't be called twice … … 139 140 140 141 # stuff for constructing full urls 141 url = docinfo['url']142 urlmode = docinfo['mode']143 sn = pageinfo.get('sn', None)144 highlightQuery = pageinfo.get('highlightQuery', None)145 tocMode = pageinfo.get('tocMode', None)146 tocPN = pageinfo.get('tocPN',None)147 142 characterNormalization = pageinfo.get('characterNormalization', None) 148 143 moreTextParam = '' 149 144 selfurl = docinfo['viewerUrl'] 150 145 151 if mode == "dict" or mode == "text_dict": 146 if not mode: 147 # default is dict 148 mode = 'text' 149 150 modes = mode.split(',') 151 # check for multiple layers 152 if len(modes) > 1: 153 logging.debug("getTextPage: more than one mode=%s"%mode) 154 155 # search mode 156 if 'search' in modes: 157 # add highlighting 158 highlightQuery = pageinfo.get('highlightQuery', None) 159 sn = pageinfo.get('sn', None) 160 if highlightQuery and sn: 161 moreTextParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) 162 163 # remove mode 164 modes.remove('search') 165 166 # other modes don't combine 167 if 'dict' in modes: 152 168 # dict is called textPollux in the backend 153 textmode = "textPollux" 154 elif not mode: 155 # default is text 156 mode = "text" 157 textmode = "text" 169 textmode = 'textPollux' 170 elif len(modes) == 0: 171 # text is default mode 172 textmode = 'text' 158 173 else: 159 textmode = mode 174 # just take first mode 175 textmode = modes[0] 160 176 161 177 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) 162 if highlightQuery: 163 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) 178 textParam += moreTextParam 164 179 165 180 # fetch the page … … 181 196 182 197 # plain text mode 183 if mode == "text":198 if textmode == "text": 184 199 # get full url assuming documentViewer is parent 185 200 selfurl = self.getLink() … … 195 210 196 211 # text-with-links mode 197 elif mode == "dict":212 elif textmode == "textPollux": 198 213 if pagediv is not None: 199 214 viewerurl = docinfo['viewerUrl'] … … 227 242 228 243 # xml mode 229 elif mode == "xml":244 elif textmode == "xml": 230 245 if pagediv is not None: 231 246 return serialize(pagediv) 232 247 233 248 # pureXml mode 234 elif mode == "pureXml":249 elif textmode == "pureXml": 235 250 if pagediv is not None: 236 251 return serialize(pagediv) 237 252 238 253 # gis mode 239 elif mode == "gis":254 elif textmode == "gis": 240 255 if pagediv is not None: 241 256 # check all a-tags … … 255 270 return None 256 271 272 273 def getSearchResults(self, mode, query=None, docinfo=None): 274 """loads list of search results and stores XML in docinfo""" 275 logging.debug("getSearchResults mode=%s query=%s"%(mode, query)) 276 if mode == "none": 277 return docinfo 278 279 if 'resultSize_%s_%s'%(mode,query) in docinfo: 280 # cached result 281 return docinfo 282 283 docpath = docinfo['textURLPath'] 284 # we need to set a result set size 285 pagesize = 1000 286 pn = 1 287 # fetch full results 288 params = {'document': docpath, 289 'mode': 'text', 290 'queryType': mode, 291 'query': query, 292 'queryResultPageSize': 1000, 293 'queryResultPN': 1} 294 pagexml = self.getServerData("doc-query.xql",urllib.urlencode(params)) 295 #pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&s=%s&viewMode=%s&characterNormalization=%s&highlightElementPos=%s&highlightElement=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, s, viewMode,characterNormalization, highlightElementPos, highlightElement, urllib.quote(highlightQuery))) 296 dom = ET.fromstring(pagexml) 297 # page content is in <div class="queryResultPage"> 298 pagediv = None 299 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage'] 300 alldivs = dom.findall("div") 301 for div in alldivs: 302 dc = div.get('class') 303 # page content div 304 if dc == 'queryResultPage': 305 pagediv = div 306 307 elif dc == 'queryResultHits': 308 docinfo['resultSize_%s_%s'%(mode,query)] = getInt(div.text) 309 310 if pagediv: 311 # store XML in docinfo 312 docinfo['resultXML_%s_%s'%(mode,query)] = ET.tostring(pagediv, 'UTF-8') 313 314 logging.debug("getSearchResults: pagediv=%s"%pagediv) 315 return docinfo 316 317 318 def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None): 319 """returns single page from the table of contents""" 320 logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn)) 321 # check for cached TOC 322 #TODO: cache only one search 323 if not docinfo.has_key('resultXML_%s_%s'%(mode,query)): 324 self.getSearchResults(mode=mode, query=query, docinfo=docinfo) 325 326 resultxml = docinfo.get('resultXML_%s_%s'%(mode,query), None) 327 if not resultxml: 328 logging.error("getResultPage: unable to find resultXML") 329 return "Error: no result!" 330 331 if size is None: 332 size = pageinfo.get('searchResultPageSize', 20) 333 334 if start is None: 335 start = (pn - 1) * size 336 337 fullresult = ET.fromstring(resultxml) 338 339 if fullresult: 340 # paginate 341 first = start 342 len = size 343 del fullresult[:first] 344 del fullresult[len:] 345 tocdivs = fullresult 346 347 # check all a-tags 348 links = tocdivs.findall(".//a") 349 for l in links: 350 href = l.get('href') 351 if href: 352 # take pn from href 353 m = re.match(r'page-fragment\.xql.*pn=(\d+)', href) 354 if m is not None: 355 # and create new url (assuming parent is documentViewer) 356 #TODO: add highlighting params 357 url = self.getLink('pn', m.group(1)) 358 l.set('href', url) 359 else: 360 logging.warning("getResultPage: Problem with link=%s"%href) 361 362 # fix two-divs-per-row with containing div 363 # newtoc = ET.Element('div', {'class':'queryResultPage'}) 364 # for (d1,d2) in zip(tocdivs[::2],tocdivs[1::2]): 365 # e = ET.Element('div',{'class':'tocline'}) 366 # e.append(d1) 367 # e.append(d2) 368 # newtoc.append(e) 369 370 return serialize(tocdivs) 371 372 return "ERROR: no results!" 373 257 374 258 375 def getToc(self, mode="text", docinfo=None): -
documentViewer.py
r507 r508 175 175 return self.template.fulltextclient.getTextPage(**args) 176 176 177 def getSearchResults(self, **args): 178 """loads list of search results and stores XML in docinfo""" 179 return self.template.fulltextclient.getSearchResults(**args) 180 181 def getResultsPage(self, **args): 182 """returns one page of the search results""" 183 return self.template.fulltextclient.getResultsPage(**args) 184 177 185 def getToc(self, **args): 178 """ returns the full table of contents (in internal format)"""186 """loads table of contents and stores XML in docinfo""" 179 187 return self.template.fulltextclient.getToc(**args) 180 188 … … 248 256 if tocMode != "thumbs": 249 257 # get table of contents 250 docinfo =self.getToc(mode=tocMode, docinfo=docinfo)258 self.getToc(mode=tocMode, docinfo=docinfo) 251 259 252 260 # auto viewMode: text if there is a text else images … … 254 262 if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 255 263 viewMode = "text" 256 viewLayer = "dict" 264 if viewLayer is None: 265 viewLayer = "dict" 257 266 else: 258 267 viewMode = "images" … … 263 272 viewLayer = "dict" 264 273 265 # stringify viewLayer266 if isinstance(viewLayer, list):267 logging.debug("index_html: viewLayer is list:%s"%viewLayer)268 viewLayer = ','.join([t for t in viewLayer if t])269 270 274 pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode) 271 275 … … 675 679 pageinfo = {} 676 680 pageinfo['viewMode'] = viewMode 681 # split viewLayer if necessary 682 if isinstance(viewLayer,basestring): 683 viewLayer = viewLayer.split(',') 684 685 if isinstance(viewLayer, list): 686 logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer) 687 # save (unique) list in viewLayers 688 seen = set() 689 viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)] 690 pageinfo['viewLayers'] = viewLayers 691 # stringify viewLayer 692 viewLayer = ','.join(viewLayers) 693 else: 694 #create list 695 pageinfo['viewLayers'] = [viewLayer] 696 677 697 pageinfo['viewLayer'] = viewLayer 678 698 pageinfo['tocMode'] = tocMode -
zpt/viewer_text.zpt
r506 r508 3 3 <html xmlns="http://www.w3.org/1999/xhtml" 4 4 tal:define="docinfo options/docinfo; pageinfo options/pageinfo; viewMode pageinfo/viewMode; 5 tocMode pageinfo/tocMode; viewLayer pageinfo/viewLayer; viewerUrl docinfo/viewerUrl; 5 tocMode pageinfo/tocMode; viewLayer pageinfo/viewLayer; viewLayers pageinfo/viewLayers; 6 viewerUrl docinfo/viewerUrl; 6 7 rootUrl here/getDocumentViewerURL; 7 8 numPages docinfo/numPages | nothing;"> … … 29 30 pn pageinfo/pn; 30 31 flowLtr python:pageinfo.get('pageFlow','ltr')!='rtl'; 32 query python:request.get('query', None); 33 queryType python:request.get('queryType','fulltextMorph'); 31 34 textPage python:here.getTextPage(mode=viewLayer, pn=pn, docinfo=docinfo, pageinfo=pageinfo) or '[no text here]';"> 32 35 <!-- header --> … … 58 61 </div> 59 62 </div> 60 <!-- col-main -->63 <!-- end of col-main --> 61 64 62 65 <!-- right-side options --> … … 65 68 <div class="options"> 66 69 <h4>Text display</h4> 70 <pre tal:content="python:repr(viewLayers)" /> 67 71 <form tal:attributes="action viewerUrl" class="autosubmit"> 68 72 <input type="hidden" 69 73 tal:define="params python:here.getParams(params={'viewLayer':None,'viewMode':None})" 70 74 tal:repeat="param params" 71 tal:attributes="name param; value python:params[param]" /> <input 72 class="autosubmit" type="radio" name="viewMode" value="text" 73 tal:attributes="checked python:viewMode=='text'" /> Text<br /> <span 74 class="optionsText"> <input type="checkbox" 75 class="autosubmit" name="viewLayer" value="dict" 76 tal:attributes="checked python:viewLayer=='dict'" /> Dictionary<br /> 77 </span> <span class="optionsText" tal:condition="python:docinfo.get('numPlaces',0)"> 78 <input type="checkbox" class="autosubmit" name="viewLayer" 79 value="gis" tal:attributes="checked python:viewLayer=='gis'" /> Places<br /> 80 </span> <input type="radio" class="autosubmit" name="viewMode" value="xml" 81 tal:attributes="checked python:viewMode=='xml'" /> XML<br /> <input 82 type="submit" value="Go!" /> 75 tal:attributes="name param; value python:params[param]" /> 76 <ul> 77 <li> 78 <input class="autosubmit" type="radio" name="viewMode" value="text" 79 tal:attributes="checked python:viewMode=='text'" /> Text 80 <ul> 81 <li> 82 <input type="checkbox" class="autosubmit" name="viewLayer" 83 value="dict" tal:attributes="checked python:'dict' in viewLayers" /> 84 Dictionary 85 </li> 86 <li tal:condition="query"> 87 <input type="checkbox" class="autosubmit" name="viewLayer" 88 value="search" 89 tal:attributes="checked python:'search' in viewLayers" /> Search hits 90 </li> 91 <li tal:condition="python:docinfo.get('numPlaces',0)"> 92 <input type="checkbox" class="autosubmit" name="viewLayer" value="gis" 93 tal:attributes="checked python:'gis' in viewLayers" /> Places<br /> 94 </li> 95 </ul> 96 </li> 97 <li> 98 <input type="radio" class="autosubmit" name="viewMode" value="xml" 99 tal:attributes="checked python:viewMode=='xml'" /> XML<br /> <input 100 type="submit" value="Go!" /> 101 </li> 102 </ul> 83 103 </form> 84 104 </div> 85 105 <!--"END TEXT DISPLAY"--> 86 106 107 <!--"BEGIN SEARCH"--> 108 <div class="options"> 109 <h4>Search</h4> 110 <form tal:attributes="action viewerUrl"> 111 <input type="hidden" 112 tal:define="params python:here.getParams(params={'query':None,'queryType':None,'viewLayer':None})" 113 tal:repeat="param params" 114 tal:attributes="name param; value python:params[param]" /> 115 <!-- make sure we have one viewLayer=search --> 116 <tal:block tal:repeat="vl viewLayers"> 117 <input type="hidden" name="viewLayer" tal:attributes="value vl" tal:condition="python:vl != 'search'"/> 118 </tal:block> 119 <input type="hidden" name="viewLayer" value="search"/> 120 <!-- query text --> 121 <input type="text" name="query" tal:attributes="value query"/> 122 <input type="submit" value="Search"/> 123 <ul> 124 <li> 125 <input type="radio" name="queryType" value="fulltext" 126 tal:attributes="checked python:queryType=='fulltext'"/> Exact 127 </li> 128 <li> 129 <input type="radio" name="queryType" value="fulltextMorph" 130 tal:attributes="checked python:queryType=='fulltextMorph'"/> All forms 131 </li> 132 <li> 133 <input type="radio" name="queryType" value="ftIndex" 134 tal:attributes="checked python:queryType=='ftIndex'"/> Fulltext index 135 </li> 136 <li> 137 <input type="radio" name="queryType" value="ftIndexMorph" 138 tal:attributes="checked python:queryType=='ftIndexMorph'"/> Morphological index 139 </li> 140 </ul> 141 </form> 142 </div> 143 <!--"END SEARCH"--> 144 87 145 <!--"BEGIN TEXT SIZE"--> 88 146 <div class="options"> 89 147 <h4>Text size</h4> 90 <div class="fsizer"> 91 <a href="javascript:fontSize(12);" class="fs_sml">S</a> <a 92 href="javascript:fontSize(14);" class="fs_med">M</a> <a 93 href="javascript:fontSize(16);" class="fs_lrg">L</a> 94 </div> 148 <ul class="fsizer"> 149 <li> 150 <a href="javascript:fontSize(12);" class="fs_sml">S</a> 151 </li> 152 <li> 153 <a href="javascript:fontSize(14);" class="fs_med">M</a> 154 </li> 155 <li> 156 <a href="javascript:fontSize(16);" class="fs_lrg">L</a> 157 </li> 158 </ul> 95 159 </div> 96 160 <!--"END TEXT SIZE"--> 97 161 98 162 <!--"BEGIN DICTIONARY OVERVIEW"--> 99 <div class="options" tal:condition="python: viewLayer=='dict'">163 <div class="options" tal:condition="python:'dict' in viewLayers"> 100 164 <h4>Dictionary view</h4> 101 165 <form name="f3" action=""> 102 <input type="radio" name="r3" />Tab<br /> <input type="radio" name="r3" />Window<br /> 166 <ul> 167 <li> 168 <input type="radio" name="r3" /> Tab 169 </li> 170 <li> 171 <input type="radio" name="r3" /> Window 172 </li> 173 </ul> 103 174 </form> 104 175 </div> … … 106 177 107 178 <!--"BEGIN TEXT NORMALIZATION"--> 108 <div class="options" tal:condition="python:viewLayer!='xml'">179 <div class="options"> 109 180 <h4>Text normalization</h4> 110 181 <form tal:attributes="action viewerUrl" class="autosubmit" … … 113 184 tal:define="params python:here.getParams('characterNormalization',None)" 114 185 tal:repeat="param params" 115 tal:attributes="name param; value python:params[param]" /> <input 116 type="radio" class="autosubmit" name="characterNormalization" value="orig" 117 tal:attributes="checked python:norm=='orig'" /> Original<br /> <input 118 type="radio" class="autosubmit" name="characterNormalization" value="reg" 119 tal:attributes="checked python:norm=='reg'" /> Regularized<br /> <input 120 type="radio" class="autosubmit" name="characterNormalization" 121 value="regPlusNorm" tal:attributes="checked python:norm=='regPlusNorm'" /> 122 Normalized<br /> <input type="submit" value="Go!" /> 186 tal:attributes="name param; value python:params[param]" /> 187 <ul> 188 <li> 189 <input type="radio" class="autosubmit" name="characterNormalization" 190 value="orig" tal:attributes="checked python:norm=='orig'" /> Original 191 </li> 192 <li> 193 <input type="radio" class="autosubmit" name="characterNormalization" 194 value="reg" tal:attributes="checked python:norm=='reg'" /> Regularized 195 </li> 196 <li> 197 <input type="radio" class="autosubmit" name="characterNormalization" 198 value="regPlusNorm" tal:attributes="checked python:norm=='regPlusNorm'" /> 199 Normalized 200 </li> 201 </ul> 202 <input type="submit" value="Go!" /> 123 203 </form> 124 204 </div> … … 126 206 127 207 <!--"BEGIN PLACES"--> 128 <div class="options" tal:condition="python: viewLayer=='gis'">208 <div class="options" tal:condition="python:'gis' in viewLayers"> 129 209 <tal:block tal:define=" 130 210 name docinfo/documentName; … … 148 228 </div> 149 229 <!-- /col-right --> 230 231 <!-- right-side options --> 232 <div class="col-veryright" tal:condition="query"> 233 <!--"BEGIN SEARCH RESULTS" --> 234 <div class="options"> 235 <h4>Search results</h4> 236 <div tal:content="structure python:here.getResultsPage(mode=queryType,query=query,pn=1,pageinfo=pageinfo,docinfo=docinfo)"/> 237 </div> 238 </div> 150 239 151 240 </div>
Note: See TracChangeset
for help on using the changeset viewer.