Changeset 566:4a31608f8b0e in documentViewer for MpiwgXmlTextServer.py
- Timestamp:
- Oct 10, 2012, 4:09:49 PM (12 years ago)
- Branch:
- default
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
MpiwgXmlTextServer.py
r565 r566 307 307 logging.debug("pagediv: %s"%repr(pagediv)) 308 308 309 # plain text mode310 if textmode == "text" :309 # plain text or text-with-links mode 310 if textmode == "text" or textmode == "dict": 311 311 if pagediv is not None: 312 # handle pb-tag 313 self._extractPbTag(pagediv, pageinfo) 312 self._processPbTag(pagediv, pageinfo) 313 self._processFigures(pagediv, docinfo) 314 #self._fixEmptyDivs(pagediv) 314 315 # get full url assuming documentViewer is parent 315 316 selfurl = self.getLink() 316 if punditMode:317 self._addPunditAttributes(pagediv, pageinfo, docinfo)318 319 # fix empty div tags320 self._fixEmptyDivs(pagediv)321 317 # check all a-tags 322 318 links = pagediv.findall('.//a') 323 for l in links:324 href = l.get('href')325 # handle notes FIXME!326 if href and href.startswith('#note-'):327 href = href.replace('#note-',"%s#note-"%selfurl)328 l.set('href', href)329 330 return serialize(pagediv)331 332 # text-with-links mode333 elif textmode == "dict":334 if pagediv is not None:335 # handle pb-div336 self._extractPbTag(pagediv, pageinfo)337 viewerurl = docinfo['viewerUrl']338 selfurl = self.getLink()339 if punditMode:340 pagediv = self.addPunditAttributes(pagediv, pageinfo, docinfo)341 342 # fix empty div tags343 self._fixEmptyDivs(pagediv)344 # check all a-tags345 links = pagediv.findall(".//a")346 319 for l in links: 347 320 href = l.get('href') … … 349 322 # is link with href 350 323 linkurl = urlparse.urlparse(href) 351 #logging.debug("getTextPage: linkurl=%s"%repr(linkurl))352 324 if linkurl.path.endswith('GetDictionaryEntries'): 353 325 #TODO: replace wordInfo page … … 357 329 l.set('target', '_blank') 358 330 359 if href.startswith('#note-'):360 # note link 331 elif href.startswith('#note-'): 332 # note link FIXME! 361 333 l.set('href', href.replace('#note-',"%s#note-"%selfurl)) 362 334 335 if punditMode: 336 self._addPunditAttributes(pagediv, pageinfo, docinfo) 337 363 338 return serialize(pagediv) 364 339 … … 373 348 return serialize(pagediv) 374 349 375 # gis mode 350 # gis mode FIXME! 376 351 elif textmode == "gis": 377 352 if pagediv is not None: … … 394 369 return None 395 370 396 def _ extractPbTag(self, pagediv, pageinfo):371 def _processPbTag(self, pagediv, pageinfo): 397 372 """extracts information from pb-tag and removes it from pagediv""" 398 373 pbdiv = pagediv.find(".//span[@class='pb']") … … 421 396 id = d.get('id') 422 397 if id: 398 # TODO: check path (cf RFC2396) 423 399 d.set('about', "http://echo.mpiwg-berlin.mpg.de/%s/pn=%s/#%s"%(textid,pn,id)) 424 400 cls = d.get('class','') … … 428 404 return pagediv 429 405 406 def _processFigures(self, pagediv, docinfo): 407 """processes figure-tags""" 408 divs = pagediv.findall(".//span[@class='figure']") 409 scalerUrl = docinfo['digilibScalerUrl'] 410 viewerUrl = docinfo['digilibViewerUrl'] 411 for d in divs: 412 try: 413 a = d.find('a') 414 img = a.find('img') 415 imgsrc = img.get('src') 416 imgurl = urlparse.urlparse(imgsrc) 417 imgq = imgurl.query 418 imgparams = urlparse.parse_qs(imgq) 419 fn = imgparams.get('fn', None) 420 if fn is not None: 421 # parse_qs puts parameters in lists 422 fn = fn[0] 423 # TODO: check valid path 424 # fix img@src 425 newsrc = '%s?fn=%s&dw=200&dh=200'%(scalerUrl,fn) 426 img.set('src', newsrc) 427 # fix a@href 428 newlink = '%s?fn=%s'%(viewerUrl,fn) 429 a.set('href', newlink) 430 a.set('target', '_blank') 431 432 except: 433 logging.warn("processFigures: strange figure!") 434 435 430 436 def _fixEmptyDivs(self, pagediv): 431 437 """fixes empty div-tags by inserting a space"""
Note: See TracChangeset
for help on using the changeset viewer.