Context Navigation

← Previous Changeset
Next Changeset →

Changeset 610:0488cd12355b in documentViewer

Timestamp:

Jan 21, 2013, 6:58:21 PM (12 years ago)

Author:

casties

Branch:

default

Message:

gis mode works again.

Files:

: 2 edited

MpiwgXmlTextServer.py (modified) (16 diffs)
documentViewer.py (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

MpiwgXmlTextServer.py

-                      r609
+                      r610
 from SrvTxtUtils import getInt, getText, getHttpData
+# mapping of fields in the output of /mpiwg-mpdl-cms-web/query/GetDocInfo to documentViewer docinfo
+textinfoFieldMap = {
+                    'countPages' : 'numTextPages',
+                    'countFigures' : 'numFigureEntries',
+                    'countNotesHandwritten' : 'numHandwritten',
+                    'countNotes' : 'numNotes',
+                    'countPlaces' : 'numPlaces',
+                    'countTocEntries' : 'numTocEntries'
+                    }
 def serialize(node):
 …
     def getPlacesOnPage(self, docinfo=None, pn=None):
         """Returns list of GIS places of page pn"""
+        #FIXME!
+        logging.debug("getPlacesOnPage(pn=%s"%pn)
+        if not 'places' in docinfo:
+            self.getTextInfo('places', docinfo)
+        allplaces = docinfo.get('places', None)
+        if len(allplaces) == 0:
+            return []
+        # search for places on this page TODO: is there a better way?
+        places = [p for p in allplaces if p['pn'] == pn]
+        return places
+        """OLD:
         docpath = docinfo.get('textURLPath',None)
         if not docpath:
 …
             places.append(place)
         return places
+        return places"""
 …
         field = ''
         if mode in ['pages', 'toc', 'figures', 'notes', 'handwritten']:
+        if mode in ['pages', 'toc', 'figures', 'notes', 'handwritten', 'places']:
             # translate mode to field param
             if mode == 'handwritten':
 …
                 sys = doc.find('system')
                 if sys is not None:
+                    docinfo['numTextPages'] = getInt(getText(sys.find('countPages')))
+                    docinfo['numFigureEntries'] = getInt(getText(sys.find('countFigures')))
+                    docinfo['numHandwritten'] = getInt(getText(sys.find('countNotesHandwritten')))
+                    docinfo['numNotes'] = getInt(getText(sys.find('countNotes')))
+                    docinfo['numPlaces'] = getInt(getText(sys.find('countPlaces')))
+                    docinfo['numTocEntries'] = getInt(getText(sys.find('countTocEntries')))
+                    for (k,v) in textinfoFieldMap.items():
+                        # copy into docinfo (even if empty)
+                        docinfo[v] = getInt(getText(sys.find(k)))
             else:
 …
                 l = doc.find('list')
                 if l is not None:
+                    # look for general info
+                    for (k,v) in textinfoFieldMap.items():
+                        # copy into docinfo (only if not empty)
+                        s = doc.find(k)
+                        if s is not None:
+                            docinfo[v] = getInt(getText(s))
                     lt = l.get('type')
+                    #
                     # pageNumbers
+                    #
                     if lt == 'pages':
                         # contains tags with page numbers
 …
                         docinfo['pageNumbers'] = pages
+                    #
                     # toc
+                    #
                     elif lt in ['toc', 'figures', 'notes', 'notesHandwritten']:
                         # contains tags with table of contents/figures
 …
                         docinfo['full_%s'%mode] = tocs
+                    #
+                    # places
+                    #
+                                        #
+                    # toc
+                    #
+                    elif lt in ['places']:
+                        # contains tags with place-ids
+                        # <item id="N40004F-01"><ref>4</ref></item>
+                        places = []
+                        for p in l:
+                            if p.tag == 'item':
+                                place = {}
+                                place['id'] = p.get('id')
+                                ref = p.find('ref')
+                                place['pn'] = getInt(ref.text)
+                                places.append(place)
+                        docinfo['places'] = places
         return docinfo
 …
             logging.debug("getTextPage: more than one mode=%s"%mode)
+        # mode defaults
+        gisMode = False
+        punditMode = False
         # search mode
         if 'search' in modes:
 …
         # pundit mode
-        punditMode = False
         if 'pundit' in modes:
             punditMode = True
 …
             normMode = 'orig'
         elif 'gis' in modes:
+            #FIXME!
+            textmode = 'gis'
+            gisMode = True
+            # gis mode uses plain text
+            textmode = 'plain'
+            textParams['outputFormat'] = 'html'
         else:
             # text is default mode
 …
         # plain text or text-with-links mode
         if textmode == "plain" or textmode == "dict":
+        if textmode == 'plain' or textmode == 'dict':
             # the text is in div@class=text
             pagediv = dom.find(".//div[@class='text']")
 …
                     self._addPunditAttributes(pagediv, pageinfo, docinfo)
+                if gisMode:
+                    self._addGisTags(pagediv, pageinfo, docinfo)
                 s = serialize(pagediv)
                 logging.debug("getTextPage done in %s"%(datetime.now()-startTime))
 …
                 return serialize(pagediv)
-        # pureXml mode WTF?
-        elif textmode == "pureXml":
-            # the text is in body
-            pagediv = dom.find(".//body")
-            logging.debug("pagediv: %s"%repr(pagediv))
-            if pagediv is not None:
-                return serialize(pagediv)
-        # gis mode FIXME!
-        elif textmode == "gis":
-            # the text is in div@class=text
-            pagediv = dom.find(".//div[@class='text']")
-            logging.debug("pagediv: %s"%repr(pagediv))
-            if pagediv is not None:
-                # fix empty div tags
-                self._fixEmptyDivs(pagediv)
-                # check all a-tags
-                links = pagediv.findall(".//a")
-                # add our URL as backlink
-                selfurl = self.getLink()
-                doc = base64.b64encode(selfurl)
-                for l in links:
-                    href = l.get('href')
-                    if href:
-                        if href.startswith('http://mappit.mpiwg-berlin.mpg.de'):
-                            l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href))
-                            l.set('target', '_blank')
-                return serialize(pagediv)
         logging.error("getTextPage: error in text mode %s or in text!"%(textmode))
         return None
 …
     def _addPunditAttributes(self, pagediv, pageinfo, docinfo):
         """add about attributes for pundit annotation tool"""
+        """add about-attributes to divs for pundit annotation tool"""
         textid = docinfo.get('DRI', "fn=%s"%docinfo.get('documentPath', '???'))
         pn = pageinfo.get('pn', '1')
-        #  TODO: use pn as well?
         # check all div-tags
         divs = pagediv.findall(".//div")
 …
                 cls += ' pundit-content'
                 d.set('class', cls.strip())
+        return pagediv
+    def _addGisTags(self, pagediv, pageinfo, docinfo):
+        """add links for gis places"""
+        # use last part of documentPath as db-id
+        docpath = docinfo.get('documentPath', '')
+        textid = docpath.split('/')[-1]
+        # add our URL as backlink
+        selfurl = self.getLink()
+        doc = base64.b64encode(selfurl)
+        # check all span@class=place
+        spans = pagediv.findall(".//span[@class='place']")
+        for s in spans:
+            id = s.get('id')
+            if id:
+                # make links like http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/songy_tiang_zh_1637?id=N400061-02&doc=aHR...&format=gis
+                s.tag = 'a'
+                # TODO: make links configurable
+                url = "http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/%s?id=%s&doc=%s&format=gis"%(textid,id,doc)
+                s.set('href', url)
+                s.set('target', '_blank')
         return pagediv

documentViewer.py

r609	r610
206	206
207	207	def getTextDownloadUrl(self, **args):
208		"""get ~~list of gis places on one page~~"""
	208	"""get URL to download the full text"""
209	209	return self.template.fulltextclient.getTextDownloadUrl(**args)
210	210

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 610:0488cd12355b in documentViewer

Legend:

MpiwgXmlTextServer.py

documentViewer.py

Download in other formats: