Mercurial > hg > documentViewer
comparison MpiwgXmlTextServer.py @ 579:fc861a6cef17
update in w-tag format.
author | casties |
---|---|
date | Fri, 26 Oct 2012 12:53:43 +0200 |
parents | 9251719154a3 |
children | ca0274423382 |
comparison
equal
deleted
inserted
replaced
578:024b75162437 | 579:fc861a6cef17 |
---|---|
210 # TODO: change values in form | 210 # TODO: change values in form |
211 if normMode == 'regPlusNorm': | 211 if normMode == 'regPlusNorm': |
212 normMode = 'norm' | 212 normMode = 'norm' |
213 | 213 |
214 # TODO: this should not be necessary when the backend is fixed | 214 # TODO: this should not be necessary when the backend is fixed |
215 textParams['normalization'] = normMode | 215 #textParams['normalization'] = normMode |
216 | 216 |
217 if not mode: | 217 if not mode: |
218 # default is dict | 218 # default is dict |
219 mode = 'text' | 219 mode = 'text' |
220 | 220 |
271 # the text is in div@class=text | 271 # the text is in div@class=text |
272 pagediv = dom.find(".//div[@class='text']") | 272 pagediv = dom.find(".//div[@class='text']") |
273 logging.debug("pagediv: %s"%repr(pagediv)) | 273 logging.debug("pagediv: %s"%repr(pagediv)) |
274 if pagediv is not None: | 274 if pagediv is not None: |
275 # add textmode and normMode classes | 275 # add textmode and normMode classes |
276 pagediv.set('class', 'text %s %s'%(textmode, normMode)) | 276 #pagediv.set('class', 'text %s %s'%(textmode, normMode)) |
277 self._processWTags(textmode, normMode, pagediv) | 277 self._processWTags(textmode, normMode, pagediv) |
278 #self._processPbTag(pagediv, pageinfo) | 278 #self._processPbTag(pagediv, pageinfo) |
279 self._processFigures(pagediv, docinfo) | 279 self._processFigures(pagediv, docinfo) |
280 #self._fixEmptyDivs(pagediv) | 280 #self._fixEmptyDivs(pagediv) |
281 # get full url assuming documentViewer is parent | 281 # get full url assuming documentViewer is parent |
340 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href)) | 340 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href)) |
341 l.set('target', '_blank') | 341 l.set('target', '_blank') |
342 | 342 |
343 return serialize(pagediv) | 343 return serialize(pagediv) |
344 | 344 |
345 logging.error("getTextPage: error in text mode %s or text!"%(textmode)) | 345 logging.error("getTextPage: error in text mode %s or in text!"%(textmode)) |
346 return None | 346 return None |
347 | 347 |
348 def _processWTags(self, textMode, normMode, pagediv): | 348 def _processWTags(self, textMode, normMode, pagediv): |
349 """selects the necessary information from w-spans and removes the rest from pagediv""" | 349 """selects the necessary information from w-spans and removes the rest from pagediv""" |
350 logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode))) | 350 logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode))) |
355 # delete non-a-tags | 355 # delete non-a-tags |
356 wtag.remove(wtag.find("span[@class='nodictionary orig']")) | 356 wtag.remove(wtag.find("span[@class='nodictionary orig']")) |
357 wtag.remove(wtag.find("span[@class='nodictionary reg']")) | 357 wtag.remove(wtag.find("span[@class='nodictionary reg']")) |
358 wtag.remove(wtag.find("span[@class='nodictionary norm']")) | 358 wtag.remove(wtag.find("span[@class='nodictionary norm']")) |
359 # delete non-matching children of a-tag and suppress remaining tag name | 359 # delete non-matching children of a-tag and suppress remaining tag name |
360 atag = wtag.find("a[@class='dictionary']") | 360 atag = wtag.find("*[@class='dictionary']") |
361 if normMode == 'orig': | 361 if normMode == 'orig': |
362 atag.remove(atag.find("span[@class='reg']")) | 362 atag.remove(atag.find("span[@class='reg']")) |
363 atag.remove(atag.find("span[@class='norm']")) | 363 atag.remove(atag.find("span[@class='norm']")) |
364 atag.find("span[@class='orig']").tag = None | 364 atag.find("span[@class='orig']").tag = None |
365 elif normMode == 'reg': | 365 elif normMode == 'reg': |
371 atag.remove(atag.find("span[@class='reg']")) | 371 atag.remove(atag.find("span[@class='reg']")) |
372 atag.find("span[@class='norm']").tag = None | 372 atag.find("span[@class='norm']").tag = None |
373 | 373 |
374 else: | 374 else: |
375 # delete a-tag | 375 # delete a-tag |
376 wtag.remove(wtag.find("a[@class='dictionary']")) | 376 wtag.remove(wtag.find("*[@class='dictionary']")) |
377 # delete non-matching children and suppress remaining tag name | 377 # delete non-matching children and suppress remaining tag name |
378 if normMode == 'orig': | 378 if normMode == 'orig': |
379 wtag.remove(wtag.find("span[@class='nodictionary reg']")) | 379 wtag.remove(wtag.find("span[@class='nodictionary reg']")) |
380 wtag.remove(wtag.find("span[@class='nodictionary norm']")) | 380 wtag.remove(wtag.find("span[@class='nodictionary norm']")) |
381 wtag.find("span[@class='nodictionary orig']").tag = None | 381 wtag.find("span[@class='nodictionary orig']").tag = None |