comparison MpiwgXmlTextServer.py @ 579:fc861a6cef17

update in w-tag format.
author casties
date Fri, 26 Oct 2012 12:53:43 +0200
parents 9251719154a3
children ca0274423382
comparison
equal deleted inserted replaced
578:024b75162437 579:fc861a6cef17
210 # TODO: change values in form 210 # TODO: change values in form
211 if normMode == 'regPlusNorm': 211 if normMode == 'regPlusNorm':
212 normMode = 'norm' 212 normMode = 'norm'
213 213
214 # TODO: this should not be necessary when the backend is fixed 214 # TODO: this should not be necessary when the backend is fixed
215 textParams['normalization'] = normMode 215 #textParams['normalization'] = normMode
216 216
217 if not mode: 217 if not mode:
218 # default is dict 218 # default is dict
219 mode = 'text' 219 mode = 'text'
220 220
271 # the text is in div@class=text 271 # the text is in div@class=text
272 pagediv = dom.find(".//div[@class='text']") 272 pagediv = dom.find(".//div[@class='text']")
273 logging.debug("pagediv: %s"%repr(pagediv)) 273 logging.debug("pagediv: %s"%repr(pagediv))
274 if pagediv is not None: 274 if pagediv is not None:
275 # add textmode and normMode classes 275 # add textmode and normMode classes
276 pagediv.set('class', 'text %s %s'%(textmode, normMode)) 276 #pagediv.set('class', 'text %s %s'%(textmode, normMode))
277 self._processWTags(textmode, normMode, pagediv) 277 self._processWTags(textmode, normMode, pagediv)
278 #self._processPbTag(pagediv, pageinfo) 278 #self._processPbTag(pagediv, pageinfo)
279 self._processFigures(pagediv, docinfo) 279 self._processFigures(pagediv, docinfo)
280 #self._fixEmptyDivs(pagediv) 280 #self._fixEmptyDivs(pagediv)
281 # get full url assuming documentViewer is parent 281 # get full url assuming documentViewer is parent
340 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href)) 340 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href))
341 l.set('target', '_blank') 341 l.set('target', '_blank')
342 342
343 return serialize(pagediv) 343 return serialize(pagediv)
344 344
345 logging.error("getTextPage: error in text mode %s or text!"%(textmode)) 345 logging.error("getTextPage: error in text mode %s or in text!"%(textmode))
346 return None 346 return None
347 347
348 def _processWTags(self, textMode, normMode, pagediv): 348 def _processWTags(self, textMode, normMode, pagediv):
349 """selects the necessary information from w-spans and removes the rest from pagediv""" 349 """selects the necessary information from w-spans and removes the rest from pagediv"""
350 logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode))) 350 logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode)))
355 # delete non-a-tags 355 # delete non-a-tags
356 wtag.remove(wtag.find("span[@class='nodictionary orig']")) 356 wtag.remove(wtag.find("span[@class='nodictionary orig']"))
357 wtag.remove(wtag.find("span[@class='nodictionary reg']")) 357 wtag.remove(wtag.find("span[@class='nodictionary reg']"))
358 wtag.remove(wtag.find("span[@class='nodictionary norm']")) 358 wtag.remove(wtag.find("span[@class='nodictionary norm']"))
359 # delete non-matching children of a-tag and suppress remaining tag name 359 # delete non-matching children of a-tag and suppress remaining tag name
360 atag = wtag.find("a[@class='dictionary']") 360 atag = wtag.find("*[@class='dictionary']")
361 if normMode == 'orig': 361 if normMode == 'orig':
362 atag.remove(atag.find("span[@class='reg']")) 362 atag.remove(atag.find("span[@class='reg']"))
363 atag.remove(atag.find("span[@class='norm']")) 363 atag.remove(atag.find("span[@class='norm']"))
364 atag.find("span[@class='orig']").tag = None 364 atag.find("span[@class='orig']").tag = None
365 elif normMode == 'reg': 365 elif normMode == 'reg':
371 atag.remove(atag.find("span[@class='reg']")) 371 atag.remove(atag.find("span[@class='reg']"))
372 atag.find("span[@class='norm']").tag = None 372 atag.find("span[@class='norm']").tag = None
373 373
374 else: 374 else:
375 # delete a-tag 375 # delete a-tag
376 wtag.remove(wtag.find("a[@class='dictionary']")) 376 wtag.remove(wtag.find("*[@class='dictionary']"))
377 # delete non-matching children and suppress remaining tag name 377 # delete non-matching children and suppress remaining tag name
378 if normMode == 'orig': 378 if normMode == 'orig':
379 wtag.remove(wtag.find("span[@class='nodictionary reg']")) 379 wtag.remove(wtag.find("span[@class='nodictionary reg']"))
380 wtag.remove(wtag.find("span[@class='nodictionary norm']")) 380 wtag.remove(wtag.find("span[@class='nodictionary norm']"))
381 wtag.find("span[@class='nodictionary orig']").tag = None 381 wtag.find("span[@class='nodictionary orig']").tag = None