# HG changeset patch # User casties # Date 1350401680 -7200 # Node ID 4778900ae3e2ec3fa7a8a15c9b33f3b86339ee1b # Parent 3b53975be0c89e3b4612051357ed0735ee6b49d5 viewMode=xml works now diff -r 3b53975be0c8 -r 4778900ae3e2 MpiwgXmlTextServer.py --- a/MpiwgXmlTextServer.py Mon Oct 15 13:03:52 2012 +0200 +++ b/MpiwgXmlTextServer.py Tue Oct 16 17:34:40 2012 +0200 @@ -267,18 +267,11 @@ logging.error("Error reading page: %s"%e) return None - pagediv = None - body = dom.find('.//body') - if body is None: - logging.error("getTextPage: no body!") - return None - - # the text is in div@class=text - pagediv = body.find(".//div[@class='text']") - logging.debug("pagediv: %s"%repr(pagediv)) - # plain text or text-with-links mode if textmode == "text" or textmode == "dict": + # the text is in div@class=text + pagediv = dom.find(".//div[@class='text']") + logging.debug("pagediv: %s"%repr(pagediv)) if pagediv is not None: #self._processPbTag(pagediv, pageinfo) self._processFigures(pagediv, docinfo) @@ -310,16 +303,25 @@ # xml mode elif textmode == "xml": + # the text is in body + pagediv = dom.find(".//body") + logging.debug("pagediv: %s"%repr(pagediv)) if pagediv is not None: return serialize(pagediv) # pureXml mode WTF? elif textmode == "pureXml": + # the text is in body + pagediv = dom.find(".//body") + logging.debug("pagediv: %s"%repr(pagediv)) if pagediv is not None: return serialize(pagediv) # gis mode FIXME! elif textmode == "gis": + # the text is in div@class=text + pagediv = dom.find(".//div[@class='text']") + logging.debug("pagediv: %s"%repr(pagediv)) if pagediv is not None: # fix empty div tags self._fixEmptyDivs(pagediv) @@ -337,6 +339,7 @@ return serialize(pagediv) + logging.error("getTextPage: error in text mode %s or text!"%(textmode)) return None def _processPbTag(self, pagediv, pageinfo):