Mercurial > hg > documentViewer
comparison MpiwgXmlTextServer.py @ 579:fc861a6cef17
update in w-tag format.
| author | casties |
|---|---|
| date | Fri, 26 Oct 2012 12:53:43 +0200 |
| parents | 9251719154a3 |
| children | ca0274423382 |
comparison
equal
deleted
inserted
replaced
| 578:024b75162437 | 579:fc861a6cef17 |
|---|---|
| 210 # TODO: change values in form | 210 # TODO: change values in form |
| 211 if normMode == 'regPlusNorm': | 211 if normMode == 'regPlusNorm': |
| 212 normMode = 'norm' | 212 normMode = 'norm' |
| 213 | 213 |
| 214 # TODO: this should not be necessary when the backend is fixed | 214 # TODO: this should not be necessary when the backend is fixed |
| 215 textParams['normalization'] = normMode | 215 #textParams['normalization'] = normMode |
| 216 | 216 |
| 217 if not mode: | 217 if not mode: |
| 218 # default is dict | 218 # default is dict |
| 219 mode = 'text' | 219 mode = 'text' |
| 220 | 220 |
| 271 # the text is in div@class=text | 271 # the text is in div@class=text |
| 272 pagediv = dom.find(".//div[@class='text']") | 272 pagediv = dom.find(".//div[@class='text']") |
| 273 logging.debug("pagediv: %s"%repr(pagediv)) | 273 logging.debug("pagediv: %s"%repr(pagediv)) |
| 274 if pagediv is not None: | 274 if pagediv is not None: |
| 275 # add textmode and normMode classes | 275 # add textmode and normMode classes |
| 276 pagediv.set('class', 'text %s %s'%(textmode, normMode)) | 276 #pagediv.set('class', 'text %s %s'%(textmode, normMode)) |
| 277 self._processWTags(textmode, normMode, pagediv) | 277 self._processWTags(textmode, normMode, pagediv) |
| 278 #self._processPbTag(pagediv, pageinfo) | 278 #self._processPbTag(pagediv, pageinfo) |
| 279 self._processFigures(pagediv, docinfo) | 279 self._processFigures(pagediv, docinfo) |
| 280 #self._fixEmptyDivs(pagediv) | 280 #self._fixEmptyDivs(pagediv) |
| 281 # get full url assuming documentViewer is parent | 281 # get full url assuming documentViewer is parent |
| 340 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href)) | 340 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href)) |
| 341 l.set('target', '_blank') | 341 l.set('target', '_blank') |
| 342 | 342 |
| 343 return serialize(pagediv) | 343 return serialize(pagediv) |
| 344 | 344 |
| 345 logging.error("getTextPage: error in text mode %s or text!"%(textmode)) | 345 logging.error("getTextPage: error in text mode %s or in text!"%(textmode)) |
| 346 return None | 346 return None |
| 347 | 347 |
| 348 def _processWTags(self, textMode, normMode, pagediv): | 348 def _processWTags(self, textMode, normMode, pagediv): |
| 349 """selects the necessary information from w-spans and removes the rest from pagediv""" | 349 """selects the necessary information from w-spans and removes the rest from pagediv""" |
| 350 logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode))) | 350 logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode))) |
| 355 # delete non-a-tags | 355 # delete non-a-tags |
| 356 wtag.remove(wtag.find("span[@class='nodictionary orig']")) | 356 wtag.remove(wtag.find("span[@class='nodictionary orig']")) |
| 357 wtag.remove(wtag.find("span[@class='nodictionary reg']")) | 357 wtag.remove(wtag.find("span[@class='nodictionary reg']")) |
| 358 wtag.remove(wtag.find("span[@class='nodictionary norm']")) | 358 wtag.remove(wtag.find("span[@class='nodictionary norm']")) |
| 359 # delete non-matching children of a-tag and suppress remaining tag name | 359 # delete non-matching children of a-tag and suppress remaining tag name |
| 360 atag = wtag.find("a[@class='dictionary']") | 360 atag = wtag.find("*[@class='dictionary']") |
| 361 if normMode == 'orig': | 361 if normMode == 'orig': |
| 362 atag.remove(atag.find("span[@class='reg']")) | 362 atag.remove(atag.find("span[@class='reg']")) |
| 363 atag.remove(atag.find("span[@class='norm']")) | 363 atag.remove(atag.find("span[@class='norm']")) |
| 364 atag.find("span[@class='orig']").tag = None | 364 atag.find("span[@class='orig']").tag = None |
| 365 elif normMode == 'reg': | 365 elif normMode == 'reg': |
| 371 atag.remove(atag.find("span[@class='reg']")) | 371 atag.remove(atag.find("span[@class='reg']")) |
| 372 atag.find("span[@class='norm']").tag = None | 372 atag.find("span[@class='norm']").tag = None |
| 373 | 373 |
| 374 else: | 374 else: |
| 375 # delete a-tag | 375 # delete a-tag |
| 376 wtag.remove(wtag.find("a[@class='dictionary']")) | 376 wtag.remove(wtag.find("*[@class='dictionary']")) |
| 377 # delete non-matching children and suppress remaining tag name | 377 # delete non-matching children and suppress remaining tag name |
| 378 if normMode == 'orig': | 378 if normMode == 'orig': |
| 379 wtag.remove(wtag.find("span[@class='nodictionary reg']")) | 379 wtag.remove(wtag.find("span[@class='nodictionary reg']")) |
| 380 wtag.remove(wtag.find("span[@class='nodictionary norm']")) | 380 wtag.remove(wtag.find("span[@class='nodictionary norm']")) |
| 381 wtag.find("span[@class='nodictionary orig']").tag = None | 381 wtag.find("span[@class='nodictionary orig']").tag = None |
