# HG changeset patch
# User casties
# Date 1358794701 -3600
# Node ID 0488cd12355bf5df56b30456b76cc229cac91a5d
# Parent 7962e6891d99444615b62695ca066b51e78e74fc
gis mode works again.
diff -r 7962e6891d99 -r 0488cd12355b MpiwgXmlTextServer.py
--- a/MpiwgXmlTextServer.py Tue Jan 15 18:15:32 2013 +0100
+++ b/MpiwgXmlTextServer.py Mon Jan 21 19:58:21 2013 +0100
@@ -13,6 +13,16 @@
from SrvTxtUtils import getInt, getText, getHttpData
+# mapping of fields in the output of /mpiwg-mpdl-cms-web/query/GetDocInfo to documentViewer docinfo
+textinfoFieldMap = {
+ 'countPages' : 'numTextPages',
+ 'countFigures' : 'numFigureEntries',
+ 'countNotesHandwritten' : 'numHandwritten',
+ 'countNotes' : 'numNotes',
+ 'countPlaces' : 'numPlaces',
+ 'countTocEntries' : 'numTocEntries'
+ }
+
def serialize(node):
"""returns a string containing an XML snippet of node"""
s = ET.tostring(node, 'UTF-8')
@@ -72,7 +82,18 @@
def getPlacesOnPage(self, docinfo=None, pn=None):
"""Returns list of GIS places of page pn"""
- #FIXME!
+ logging.debug("getPlacesOnPage(pn=%s"%pn)
+ if not 'places' in docinfo:
+ self.getTextInfo('places', docinfo)
+
+ allplaces = docinfo.get('places', None)
+ if len(allplaces) == 0:
+ return []
+
+ # search for places on this page TODO: is there a better way?
+ places = [p for p in allplaces if p['pn'] == pn]
+ return places
+ """OLD:
docpath = docinfo.get('textURLPath',None)
if not docpath:
return None
@@ -87,7 +108,7 @@
place = {'id': id, 'name': name}
places.append(place)
- return places
+ return places"""
def getTextInfo(self, mode=None, docinfo=None):
@@ -95,7 +116,7 @@
logging.debug("getTextInfo mode=%s"%mode)
field = ''
- if mode in ['pages', 'toc', 'figures', 'notes', 'handwritten']:
+ if mode in ['pages', 'toc', 'figures', 'notes', 'handwritten', 'places']:
# translate mode to field param
if mode == 'handwritten':
field = '&field=notesHandwritten'
@@ -132,19 +153,25 @@
# get general info from system-tag
sys = doc.find('system')
if sys is not None:
- docinfo['numTextPages'] = getInt(getText(sys.find('countPages')))
- docinfo['numFigureEntries'] = getInt(getText(sys.find('countFigures')))
- docinfo['numHandwritten'] = getInt(getText(sys.find('countNotesHandwritten')))
- docinfo['numNotes'] = getInt(getText(sys.find('countNotes')))
- docinfo['numPlaces'] = getInt(getText(sys.find('countPlaces')))
- docinfo['numTocEntries'] = getInt(getText(sys.find('countTocEntries')))
+ for (k,v) in textinfoFieldMap.items():
+ # copy into docinfo (even if empty)
+ docinfo[v] = getInt(getText(sys.find(k)))
else:
# result is in list-tag
l = doc.find('list')
if l is not None:
+ # look for general info
+ for (k,v) in textinfoFieldMap.items():
+ # copy into docinfo (only if not empty)
+ s = doc.find(k)
+ if s is not None:
+ docinfo[v] = getInt(getText(s))
+
lt = l.get('type')
+ #
# pageNumbers
+ #
if lt == 'pages':
# contains tags with page numbers
#
@@ -164,8 +191,10 @@
pages[pn] = page
docinfo['pageNumbers'] = pages
-
+
+ #
# toc
+ #
elif lt in ['toc', 'figures', 'notes', 'notesHandwritten']:
# contains tags with table of contents/figures
# - CAP.I.
[132]
@@ -185,6 +214,26 @@
# save as full_toc/full_figures
docinfo['full_%s'%mode] = tocs
+ #
+ # places
+ #
+ #
+ # toc
+ #
+ elif lt in ['places']:
+ # contains tags with place-ids
+ # [4]
+ places = []
+ for p in l:
+ if p.tag == 'item':
+ place = {}
+ place['id'] = p.get('id')
+ ref = p.find('ref')
+ place['pn'] = getInt(ref.text)
+ places.append(place)
+
+ docinfo['places'] = places
+
return docinfo
@@ -224,6 +273,10 @@
if len(modes) > 1:
logging.debug("getTextPage: more than one mode=%s"%mode)
+ # mode defaults
+ gisMode = False
+ punditMode = False
+
# search mode
if 'search' in modes:
# add highlighting
@@ -237,7 +290,6 @@
modes.remove('search')
# pundit mode
- punditMode = False
if 'pundit' in modes:
punditMode = True
# ignore mode in the following
@@ -252,8 +304,10 @@
textParams['outputFormat'] = 'xmlDisplay'
normMode = 'orig'
elif 'gis' in modes:
- #FIXME!
- textmode = 'gis'
+ gisMode = True
+ # gis mode uses plain text
+ textmode = 'plain'
+ textParams['outputFormat'] = 'html'
else:
# text is default mode
textmode = 'plain'
@@ -268,7 +322,7 @@
return None
# plain text or text-with-links mode
- if textmode == "plain" or textmode == "dict":
+ if textmode == 'plain' or textmode == 'dict':
# the text is in div@class=text
pagediv = dom.find(".//div[@class='text']")
logging.debug("pagediv: %s"%repr(pagediv))
@@ -296,6 +350,9 @@
if punditMode:
self._addPunditAttributes(pagediv, pageinfo, docinfo)
+ if gisMode:
+ self._addGisTags(pagediv, pageinfo, docinfo)
+
s = serialize(pagediv)
logging.debug("getTextPage done in %s"%(datetime.now()-startTime))
return s
@@ -308,36 +365,6 @@
if pagediv is not None:
return serialize(pagediv)
- # pureXml mode WTF?
- elif textmode == "pureXml":
- # the text is in body
- pagediv = dom.find(".//body")
- logging.debug("pagediv: %s"%repr(pagediv))
- if pagediv is not None:
- return serialize(pagediv)
-
- # gis mode FIXME!
- elif textmode == "gis":
- # the text is in div@class=text
- pagediv = dom.find(".//div[@class='text']")
- logging.debug("pagediv: %s"%repr(pagediv))
- if pagediv is not None:
- # fix empty div tags
- self._fixEmptyDivs(pagediv)
- # check all a-tags
- links = pagediv.findall(".//a")
- # add our URL as backlink
- selfurl = self.getLink()
- doc = base64.b64encode(selfurl)
- for l in links:
- href = l.get('href')
- if href:
- if href.startswith('http://mappit.mpiwg-berlin.mpg.de'):
- l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href))
- l.set('target', '_blank')
-
- return serialize(pagediv)
-
logging.error("getTextPage: error in text mode %s or in text!"%(textmode))
return None
@@ -408,10 +435,9 @@
return pagediv
def _addPunditAttributes(self, pagediv, pageinfo, docinfo):
- """add about attributes for pundit annotation tool"""
+ """add about-attributes to divs for pundit annotation tool"""
textid = docinfo.get('DRI', "fn=%s"%docinfo.get('documentPath', '???'))
pn = pageinfo.get('pn', '1')
- # TODO: use pn as well?
# check all div-tags
divs = pagediv.findall(".//div")
for d in divs:
@@ -425,6 +451,28 @@
return pagediv
+ def _addGisTags(self, pagediv, pageinfo, docinfo):
+ """add links for gis places"""
+ # use last part of documentPath as db-id
+ docpath = docinfo.get('documentPath', '')
+ textid = docpath.split('/')[-1]
+ # add our URL as backlink
+ selfurl = self.getLink()
+ doc = base64.b64encode(selfurl)
+ # check all span@class=place
+ spans = pagediv.findall(".//span[@class='place']")
+ for s in spans:
+ id = s.get('id')
+ if id:
+ # make links like http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/songy_tiang_zh_1637?id=N400061-02&doc=aHR...&format=gis
+ s.tag = 'a'
+ # TODO: make links configurable
+ url = "http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/%s?id=%s&doc=%s&format=gis"%(textid,id,doc)
+ s.set('href', url)
+ s.set('target', '_blank')
+
+ return pagediv
+
def _processFigures(self, pagediv, docinfo):
"""processes figure-tags"""
# unfortunately etree can not select class.startswith('figure')
@@ -713,4 +761,4 @@
if RESPONSE is not None:
RESPONSE.redirect('manage_main')
-
\ No newline at end of file
+
diff -r 7962e6891d99 -r 0488cd12355b documentViewer.py
--- a/documentViewer.py Tue Jan 15 18:15:32 2013 +0100
+++ b/documentViewer.py Mon Jan 21 19:58:21 2013 +0100
@@ -205,7 +205,7 @@
return self.template.fulltextclient.getRepositoryType(**args)
def getTextDownloadUrl(self, **args):
- """get list of gis places on one page"""
+ """get URL to download the full text"""
return self.template.fulltextclient.getTextDownloadUrl(**args)
def getPlacesOnPage(self, **args):