# HG changeset patch # User casties # Date 1350409613 -7200 # Node ID f0e5e9c6737fa192bab7c7b2cb200d80055327c1 # Parent 4778900ae3e2ec3fa7a8a15c9b33f3b86339ee1b new w-tag solution with css. (processWTags doesn't work) diff -r 4778900ae3e2 -r f0e5e9c6737f MpiwgXmlTextServer.py --- a/MpiwgXmlTextServer.py Tue Oct 16 17:34:40 2012 +0200 +++ b/MpiwgXmlTextServer.py Tue Oct 16 19:46:53 2012 +0200 @@ -204,14 +204,11 @@ textParams = {'docId': docpath, 'page': pn} - if 'characterNormalization' in pageinfo: - cn = pageinfo['characterNormalization'] - # TODO: change values in form - if cn == 'regPlusNorm': - cn = 'norm' - - textParams['normalization'] = cn - + normMode = pageinfo.get('characterNormalization', 'reg') + # TODO: change values in form + if normMode == 'regPlusNorm': + normMode = 'norm' + if not mode: # default is dict mode = 'text' @@ -249,13 +246,13 @@ textmode = 'xml' textParams['mode'] = 'untokenized' textParams['outputFormat'] = 'xmlDisplay' - textParams['normalization'] = 'orig' + textParams['normMode'] = 'orig' elif 'gis' in modes: #FIXME! textmode = 'gis' else: # text is default mode - textmode = 'text' + textmode = 'plain' textParams['mode'] = 'untokenized' textParams['outputFormat'] = 'html' @@ -268,11 +265,14 @@ return None # plain text or text-with-links mode - if textmode == "text" or textmode == "dict": + if textmode == "plain" or textmode == "dict": # the text is in div@class=text pagediv = dom.find(".//div[@class='text']") logging.debug("pagediv: %s"%repr(pagediv)) if pagediv is not None: + # add textmode and normMode classes + pagediv.set('class', 'text %s %s'%(textmode, normMode)) + #self._processWTags(textmode, normMode, pagediv) #self._processPbTag(pagediv, pageinfo) self._processFigures(pagediv, docinfo) #self._fixEmptyDivs(pagediv) @@ -342,6 +342,36 @@ logging.error("getTextPage: error in text mode %s or text!"%(textmode)) return None + def _processWTags(self, textMode, normMode, pagediv): + """selects the necessary information from w-spans and removes the rest from pagediv""" + logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode))) + wtags = pagediv.findall(".//span[@class='w']") + for wtag in wtags: + text = None + attr = None + if textMode == 'dict': + # take a-tag and matching child + attr = wtag.find('a').items() + text = wtag.find("a/span[@class='%s']"%normMode).text + else: + # take matching child + text = wtag.find("span[@class='nodictionary %s']"%normMode).text + + if text: + # replace wtag by new content + logging.debug("new w-tag attr=%s text=%s"%(attr,text)) + wtag.clear() + + if attr: + # make dictionary link + wtag.tag = 'a' + wtag.attrib.update(dict(attr)) + + # text content + wtag.text = text + + return pagediv + def _processPbTag(self, pagediv, pageinfo): """extracts information from pb-tag and removes it from pagediv""" pbdiv = pagediv.find(".//span[@class='pb']") diff -r 4778900ae3e2 -r f0e5e9c6737f css/docuviewer.css --- a/css/docuviewer.css Tue Oct 16 17:34:40 2012 +0200 +++ b/css/docuviewer.css Tue Oct 16 19:46:53 2012 +0200 @@ -268,6 +268,26 @@ margin-top: 0.5em; margin-bottom: 0.25em; } +/* normalization forms */ +div.col.main div.content.text div.text.orig span.w span.reg, +div.col.main div.content.text div.text.orig span.w span.norm { + display: none; +} +div.col.main div.content.text div.text.reg span.w span.orig, +div.col.main div.content.text div.text.reg span.w span.norm { + display: none; +} +div.col.main div.content.text div.text.norm span.w span.orig, +div.col.main div.content.text div.text.norm span.w span.reg { + display: none; +} +/* dictionary forms */ +div.col.main div.content.text div.text.plain span.w a.dictionary { + display: none; +} +div.col.main div.content.text div.text.dict span.w span.nodictionary { + display: none; +} /* page break */ div.col.main div.content.text span.pb span.n, div.col.main div.content.text span.pb span.o {