documentViewer: MpiwgXmlTextServer.py annotate

annotate MpiwgXmlTextServer.py @ 626:7dafe8283312

make sslify accessible for templates.

author	casties
date	Mon, 15 Dec 2014 17:31:08 +0100
parents	c57d80a649ea
children	4a75a760def2

rev	line source
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	1 from OFS.SimpleItem import SimpleItem
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	3
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	4 import xml.etree.ElementTree as ET
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	5
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	6 import re
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	7 import logging
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	8 import urllib
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	9 import urlparse
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	10 import base64
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	11
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	12 from datetime import datetime
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	13
613 c57d80a649ea CLOSED - # 281: List of thumbnails verschluckt Seite, wenn odd-scan-position gesetzt ist casties parents: 610 diff changeset	14 from SrvTxtUtils import getInt, getText, getHttpData, serialize
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	15
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	16 # mapping of fields in the output of /mpiwg-mpdl-cms-web/query/GetDocInfo to documentViewer docinfo
0488cd12355b gis mode works again. casties parents: 609 diff changeset	17 textinfoFieldMap = {
0488cd12355b gis mode works again. casties parents: 609 diff changeset	18 'countPages' : 'numTextPages',
0488cd12355b gis mode works again. casties parents: 609 diff changeset	19 'countFigures' : 'numFigureEntries',
0488cd12355b gis mode works again. casties parents: 609 diff changeset	20 'countNotesHandwritten' : 'numHandwritten',
0488cd12355b gis mode works again. casties parents: 609 diff changeset	21 'countNotes' : 'numNotes',
0488cd12355b gis mode works again. casties parents: 609 diff changeset	22 'countPlaces' : 'numPlaces',
0488cd12355b gis mode works again. casties parents: 609 diff changeset	23 'countTocEntries' : 'numTocEntries'
0488cd12355b gis mode works again. casties parents: 609 diff changeset	24 }
0488cd12355b gis mode works again. casties parents: 609 diff changeset	25
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	26
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	27 class MpiwgXmlTextServer(SimpleItem):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	28 """TextServer implementation for MPIWG-XML server"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	29 meta_type="MPIWG-XML TextServer"
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	30
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	31 manage_options=(
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	32 {'label':'Config','action':'manage_changeMpiwgXmlTextServerForm'},
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	33 )+SimpleItem.manage_options
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	34
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	35 manage_changeMpiwgXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpiwgXmlTextServer", globals())
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	36
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	37 def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpiwg-mpdl-cms-web/", timeout=40, serverName=None, repositoryType='production'):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	38 """constructor"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	39 self.id=id
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	40 self.title=title
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	41 self.timeout = timeout
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	42 self.repositoryType = repositoryType
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	43 if serverName is None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	44 self.serverUrl = serverUrl
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	45 else:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	46 self.serverUrl = "http://%s/mpiwg-mpdl-cms-web/"%serverName
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	47
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	48 def getHttpData(self, url, data=None):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	49 """returns result from url+data HTTP request"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	50 return getHttpData(url,data,timeout=self.timeout)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	51
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	52 def getServerData(self, method, data=None):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	53 """returns result from text server for method+data"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	54 url = self.serverUrl+method
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	55 return getHttpData(url,data,timeout=self.timeout)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	56
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	57
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	58 def getRepositoryType(self):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	59 """returns the repository type, e.g. 'production'"""
572 51800c42bcda deal with empty repositoryType casties parents: 570 diff changeset	60 return getattr(self, 'repositoryType', None)
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	61
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	62 def getTextDownloadUrl(self, type='xml', docinfo=None):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	63 """returns a URL to download the current text"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	64 docpath = docinfo.get('textURLPath', None)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	65 if not docpath:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	66 return None
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	67
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	68 docpath = docpath.replace('.xml','.'+type)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	69 url = '%sdoc/GetDocument?id=%s'%(self.serverUrl.replace('interface/',''), docpath)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	70 return url
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	71
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	72
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	73 def getPlacesOnPage(self, docinfo=None, pn=None):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	74 """Returns list of GIS places of page pn"""
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	75 logging.debug("getPlacesOnPage(pn=%s"%pn)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	76 if not 'places' in docinfo:
0488cd12355b gis mode works again. casties parents: 609 diff changeset	77 self.getTextInfo('places', docinfo)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	78
0488cd12355b gis mode works again. casties parents: 609 diff changeset	79 allplaces = docinfo.get('places', None)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	80 if len(allplaces) == 0:
0488cd12355b gis mode works again. casties parents: 609 diff changeset	81 return []
0488cd12355b gis mode works again. casties parents: 609 diff changeset	82
0488cd12355b gis mode works again. casties parents: 609 diff changeset	83 # search for places on this page TODO: is there a better way?
0488cd12355b gis mode works again. casties parents: 609 diff changeset	84 places = [p for p in allplaces if p['pn'] == pn]
0488cd12355b gis mode works again. casties parents: 609 diff changeset	85 return places
0488cd12355b gis mode works again. casties parents: 609 diff changeset	86 """OLD:
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	87 docpath = docinfo.get('textURLPath',None)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	88 if not docpath:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	89 return None
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	90
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	91 places=[]
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	92 text=self.getServerData("xpath.xql", "document=%s&xpath=//place&pn=%s"%(docpath,pn))
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	93 dom = ET.fromstring(text)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	94 result = dom.findall(".//resultPage/place")
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	95 for l in result:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	96 id = l.get("id")
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	97 name = l.text
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	98 place = {'id': id, 'name': name}
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	99 places.append(place)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	100
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	101 return places"""
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	102
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	103
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	104 def getTextInfo(self, mode=None, docinfo=None):
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	105 """reads document info, including page concordance, from text server"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	106 logging.debug("getTextInfo mode=%s"%mode)
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	107
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	108 field = ''
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	109 if mode in ['pages', 'toc', 'figures', 'notes', 'handwritten', 'places']:
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	110 # translate mode to field param
609 7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	111 if mode == 'handwritten':
7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	112 field = '&field=notesHandwritten'
7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	113 else:
7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	114 field = '&field=%s'%mode
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	115 else:
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	116 mode = None
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	117
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	118 # check cached info
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	119 if mode:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	120 # cached toc-request?
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	121 if 'full_%s'%mode in docinfo:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	122 return docinfo
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	123
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	124 else:
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	125 # cached but no toc-request?
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	126 if 'numTextPages' in docinfo:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	127 return docinfo
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	128
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	129 docpath = docinfo.get('textURLPath', None)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	130 if docpath is None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	131 logging.error("getTextInfo: no textURLPath!")
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	132 return docinfo
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	133
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	134 # fetch docinfo
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	135 pagexml = self.getServerData("query/GetDocInfo","docId=%s%s"%(docpath,field))
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	136 dom = ET.fromstring(pagexml)
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	137 # all info in tag <doc>
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	138 doc = dom
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	139 if doc is None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	140 logging.error("getTextInfo: unable to find document-tag!")
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	141 else:
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	142 if mode is None:
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	143 # get general info from system-tag
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	144 sys = doc.find('system')
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	145 if sys is not None:
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	146 for (k,v) in textinfoFieldMap.items():
0488cd12355b gis mode works again. casties parents: 609 diff changeset	147 # copy into docinfo (even if empty)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	148 docinfo[v] = getInt(getText(sys.find(k)))
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	149
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	150 else:
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	151 # result is in list-tag
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	152 l = doc.find('list')
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	153 if l is not None:
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	154 # look for general info
0488cd12355b gis mode works again. casties parents: 609 diff changeset	155 for (k,v) in textinfoFieldMap.items():
0488cd12355b gis mode works again. casties parents: 609 diff changeset	156 # copy into docinfo (only if not empty)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	157 s = doc.find(k)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	158 if s is not None:
0488cd12355b gis mode works again. casties parents: 609 diff changeset	159 docinfo[v] = getInt(getText(s))
0488cd12355b gis mode works again. casties parents: 609 diff changeset	160
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	161 lt = l.get('type')
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	162 #
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	163 # pageNumbers
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	164 #
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	165 if lt == 'pages':
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	166 # contains tags with page numbers
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	167 # <item n="14" o="2" o-norm="2" file="0014"/>
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	168 # n=scan number, o=original page no, on=normalized original page no
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	169 # pageNumbers is a dict indexed by scan number
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	170 pages = {}
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	171 for i in l:
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	172 page = {}
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	173 pn = getInt(i.get('n'))
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	174 page['pn'] = pn
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	175 no = i.get('o')
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	176 page['no'] = no
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	177 non = i.get('o-norm')
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	178 page['non'] = non
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	179
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	180 if pn > 0:
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	181 pages[pn] = page
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	182
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	183 docinfo['pageNumbers'] = pages
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	184
0488cd12355b gis mode works again. casties parents: 609 diff changeset	185 #
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	186 # toc
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	187 #
609 7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	188 elif lt in ['toc', 'figures', 'notes', 'notesHandwritten']:
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	189 # contains tags with table of contents/figures
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	190 # <item n="2.1." lv="2">CAP.I. <ref o="119">132</ref></item>
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	191 tocs = []
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	192 for te in l:
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	193 if te.tag == 'item':
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	194 toc = {}
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	195 toc['level-string'] = te.get('n')
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	196 toc['level'] = te.get('lv')
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	197 toc['content'] = te.text.strip()
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	198 ref = te.find('ref')
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	199 toc['pn'] = getInt(ref.text)
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	200 toc['no'] = ref.get('o')
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	201 toc['non'] = ref.get('o-norm')
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	202 tocs.append(toc)
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	203
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	204 # save as full_toc/full_figures
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	205 docinfo['full_%s'%mode] = tocs
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	206
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	207 #
0488cd12355b gis mode works again. casties parents: 609 diff changeset	208 # places
0488cd12355b gis mode works again. casties parents: 609 diff changeset	209 #
0488cd12355b gis mode works again. casties parents: 609 diff changeset	210 #
0488cd12355b gis mode works again. casties parents: 609 diff changeset	211 # toc
0488cd12355b gis mode works again. casties parents: 609 diff changeset	212 #
0488cd12355b gis mode works again. casties parents: 609 diff changeset	213 elif lt in ['places']:
0488cd12355b gis mode works again. casties parents: 609 diff changeset	214 # contains tags with place-ids
0488cd12355b gis mode works again. casties parents: 609 diff changeset	215 # <item id="N40004F-01"><ref>4</ref></item>
0488cd12355b gis mode works again. casties parents: 609 diff changeset	216 places = []
0488cd12355b gis mode works again. casties parents: 609 diff changeset	217 for p in l:
0488cd12355b gis mode works again. casties parents: 609 diff changeset	218 if p.tag == 'item':
0488cd12355b gis mode works again. casties parents: 609 diff changeset	219 place = {}
0488cd12355b gis mode works again. casties parents: 609 diff changeset	220 place['id'] = p.get('id')
0488cd12355b gis mode works again. casties parents: 609 diff changeset	221 ref = p.find('ref')
0488cd12355b gis mode works again. casties parents: 609 diff changeset	222 place['pn'] = getInt(ref.text)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	223 places.append(place)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	224
0488cd12355b gis mode works again. casties parents: 609 diff changeset	225 docinfo['places'] = places
0488cd12355b gis mode works again. casties parents: 609 diff changeset	226
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	227 return docinfo
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	228
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	229
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	230 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	231 """returns single page from fulltext"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	232
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	233 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn))
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	234 startTime = datetime.now()
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	235 # check for cached text -- but ideally this shouldn't be called twice
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	236 if pageinfo.has_key('textPage'):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	237 logging.debug("getTextPage: using cached text")
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	238 return pageinfo['textPage']
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	239
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	240 docpath = docinfo.get('textURLPath', None)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	241 if not docpath:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	242 return None
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	243
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	244 # stuff for constructing full urls
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	245 selfurl = docinfo['viewerUrl']
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	246 textParams = {'docId': docpath,
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	247 'page': pn}
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	248
575 f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	249 normMode = pageinfo.get('characterNormalization', 'reg')
f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	250 # TODO: change values in form
f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	251 if normMode == 'regPlusNorm':
f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	252 normMode = 'norm'
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	253
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	254 # TODO: this should not be necessary when the backend is fixed
579 fc861a6cef17 update in w-tag format. casties parents: 577 diff changeset	255 #textParams['normalization'] = normMode
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	256
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	257 if not mode:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	258 # default is dict
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	259 mode = 'text'
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	260
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	261 modes = mode.split(',')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	262 # check for multiple layers
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	263 if len(modes) > 1:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	264 logging.debug("getTextPage: more than one mode=%s"%mode)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	265
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	266 # mode defaults
0488cd12355b gis mode works again. casties parents: 609 diff changeset	267 gisMode = False
0488cd12355b gis mode works again. casties parents: 609 diff changeset	268 punditMode = False
0488cd12355b gis mode works again. casties parents: 609 diff changeset	269
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	270 # search mode
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	271 if 'search' in modes:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	272 # add highlighting
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	273 highlightQuery = pageinfo.get('highlightQuery', None)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	274 if highlightQuery:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	275 textParams['highlightQuery'] = highlightQuery
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	276 textParams['highlightElem'] = pageinfo.get('highlightElement', '')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	277 textParams['highlightElemPos'] = pageinfo.get('highlightElementPos', '')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	278
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	279 # ignore mode in the following
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	280 modes.remove('search')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	281
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	282 # pundit mode
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	283 if 'pundit' in modes:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	284 punditMode = True
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	285 # ignore mode in the following
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	286 modes.remove('pundit')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	287
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	288 # other modes don't combine
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	289 if 'dict' in modes:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	290 textmode = 'dict'
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	291 textParams['outputFormat'] = 'html'
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	292 elif 'xml' in modes:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	293 textmode = 'xml'
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	294 textParams['outputFormat'] = 'xmlDisplay'
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	295 normMode = 'orig'
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	296 elif 'gis' in modes:
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	297 gisMode = True
0488cd12355b gis mode works again. casties parents: 609 diff changeset	298 # gis mode uses plain text
0488cd12355b gis mode works again. casties parents: 609 diff changeset	299 textmode = 'plain'
0488cd12355b gis mode works again. casties parents: 609 diff changeset	300 textParams['outputFormat'] = 'html'
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	301 else:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	302 # text is default mode
575 f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	303 textmode = 'plain'
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	304 textParams['outputFormat'] = 'html'
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	305
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	306 try:
570 61d53ccbdd70 more resilience to server errors. casties parents: 568 diff changeset	307 # fetch the page
61d53ccbdd70 more resilience to server errors. casties parents: 568 diff changeset	308 pagexml = self.getServerData("query/GetPage",urllib.urlencode(textParams))
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	309 dom = ET.fromstring(pagexml)
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	310 except Exception, e:
570 61d53ccbdd70 more resilience to server errors. casties parents: 568 diff changeset	311 logging.error("Error reading page: %s"%e)
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	312 return None
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	313
566 4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	314 # plain text or text-with-links mode
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	315 if textmode == 'plain' or textmode == 'dict':
574 4778900ae3e2 viewMode=xml works now casties parents: 572 diff changeset	316 # the text is in div@class=text
4778900ae3e2 viewMode=xml works now casties parents: 572 diff changeset	317 pagediv = dom.find(".//div[@class='text']")
4778900ae3e2 viewMode=xml works now casties parents: 572 diff changeset	318 logging.debug("pagediv: %s"%repr(pagediv))
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	319 if pagediv is not None:
575 f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	320 # add textmode and normMode classes
579 fc861a6cef17 update in w-tag format. casties parents: 577 diff changeset	321 #pagediv.set('class', 'text %s %s'%(textmode, normMode))
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	322 self._processWTags(textmode, normMode, pagediv)
567 8b1e20bf300d more new textserver casties parents: 566 diff changeset	323 #self._processPbTag(pagediv, pageinfo)
566 4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	324 self._processFigures(pagediv, docinfo)
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	325 #self._fixEmptyDivs(pagediv)
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	326 # get full url assuming documentViewer is parent
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	327 selfurl = self.getLink()
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	328 # check all a-tags
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	329 links = pagediv.findall('.//a')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	330 for l in links:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	331 href = l.get('href')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	332 if href:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	333 # is link with href
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	334 linkurl = urlparse.urlparse(href)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	335 if linkurl.path.endswith('GetDictionaryEntries'):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	336 #TODO: replace wordInfo page
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	337 # add target to open new page
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	338 l.set('target', '_blank')
566 4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	339
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	340 if punditMode:
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	341 self._addPunditAttributes(pagediv, pageinfo, docinfo)
577 9251719154a3 toc with list of handwritten notes. casties parents: 576 diff changeset	342
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	343 if gisMode:
0488cd12355b gis mode works again. casties parents: 609 diff changeset	344 self._addGisTags(pagediv, pageinfo, docinfo)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	345
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	346 s = serialize(pagediv)
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	347 logging.debug("getTextPage done in %s"%(datetime.now()-startTime))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	348 return s
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	349
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	350 # xml mode
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	351 elif textmode == "xml":
574 4778900ae3e2 viewMode=xml works now casties parents: 572 diff changeset	352 # the text is in body
4778900ae3e2 viewMode=xml works now casties parents: 572 diff changeset	353 pagediv = dom.find(".//body")
4778900ae3e2 viewMode=xml works now casties parents: 572 diff changeset	354 logging.debug("pagediv: %s"%repr(pagediv))
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	355 if pagediv is not None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	356 return serialize(pagediv)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	357
579 fc861a6cef17 update in w-tag format. casties parents: 577 diff changeset	358 logging.error("getTextPage: error in text mode %s or in text!"%(textmode))
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	359 return None
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	360
575 f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	361 def _processWTags(self, textMode, normMode, pagediv):
f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	362 """selects the necessary information from w-spans and removes the rest from pagediv"""
f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	363 logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode)))
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	364 startTime = datetime.now()
575 f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	365 wtags = pagediv.findall(".//span[@class='w']")
f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	366 for wtag in wtags:
f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	367 if textMode == 'dict':
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	368 # delete non-a-tags
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	369 wtag.remove(wtag.find("span[@class='nodictionary orig']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	370 wtag.remove(wtag.find("span[@class='nodictionary reg']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	371 wtag.remove(wtag.find("span[@class='nodictionary norm']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	372 # delete non-matching children of a-tag and suppress remaining tag name
579 fc861a6cef17 update in w-tag format. casties parents: 577 diff changeset	373 atag = wtag.find("*[@class='dictionary']")
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	374 if normMode == 'orig':
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	375 atag.remove(atag.find("span[@class='reg']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	376 atag.remove(atag.find("span[@class='norm']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	377 atag.find("span[@class='orig']").tag = None
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	378 elif normMode == 'reg':
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	379 atag.remove(atag.find("span[@class='orig']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	380 atag.remove(atag.find("span[@class='norm']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	381 atag.find("span[@class='reg']").tag = None
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	382 elif normMode == 'norm':
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	383 atag.remove(atag.find("span[@class='orig']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	384 atag.remove(atag.find("span[@class='reg']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	385 atag.find("span[@class='norm']").tag = None
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	386
575 f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	387 else:
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	388 # delete a-tag
579 fc861a6cef17 update in w-tag format. casties parents: 577 diff changeset	389 wtag.remove(wtag.find("*[@class='dictionary']"))
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	390 # delete non-matching children and suppress remaining tag name
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	391 if normMode == 'orig':
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	392 wtag.remove(wtag.find("span[@class='nodictionary reg']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	393 wtag.remove(wtag.find("span[@class='nodictionary norm']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	394 wtag.find("span[@class='nodictionary orig']").tag = None
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	395 elif normMode == 'reg':
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	396 wtag.remove(wtag.find("span[@class='nodictionary orig']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	397 wtag.remove(wtag.find("span[@class='nodictionary norm']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	398 wtag.find("span[@class='nodictionary reg']").tag = None
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	399 elif normMode == 'norm':
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	400 wtag.remove(wtag.find("span[@class='nodictionary orig']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	401 wtag.remove(wtag.find("span[@class='nodictionary reg']"))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	402 wtag.find("span[@class='nodictionary norm']").tag = None
575 f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	403
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	404 # suppress w-tag name
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	405 wtag.tag = None
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	406
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	407 logging.debug("processWTags in %s"%(datetime.now()-startTime))
575 f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	408 return pagediv
f0e5e9c6737f new w-tag solution with css. casties parents: 574 diff changeset	409
566 4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	410 def _processPbTag(self, pagediv, pageinfo):
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	411 """extracts information from pb-tag and removes it from pagediv"""
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	412 pbdiv = pagediv.find(".//span[@class='pb']")
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	413 if pbdiv is None:
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	414 logging.warning("getTextPage: no pb-span!")
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	415 return pagediv
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	416
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	417 # extract running head
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	418 rh = pbdiv.find(".//span[@class='rhead']")
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	419 if rh is not None:
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	420 pageinfo['pageHeaderTitle'] = getText(rh)
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	421
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	422 # remove pb-div from parent
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	423 ppdiv = pagediv.find(".//span[@class='pb']/..")
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	424 ppdiv.remove(pbdiv)
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	425 return pagediv
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	426
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	427 def _addPunditAttributes(self, pagediv, pageinfo, docinfo):
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	428 """add about-attributes to divs for pundit annotation tool"""
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	429 textid = docinfo.get('DRI', "fn=%s"%docinfo.get('documentPath', '???'))
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	430 pn = pageinfo.get('pn', '1')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	431 # check all div-tags
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	432 divs = pagediv.findall(".//div")
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	433 for d in divs:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	434 id = d.get('id')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	435 if id:
566 4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	436 # TODO: check path (cf RFC2396)
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	437 d.set('about', "http://echo.mpiwg-berlin.mpg.de/%s/pn=%s/#%s"%(textid,pn,id))
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	438 cls = d.get('class','')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	439 cls += ' pundit-content'
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	440 d.set('class', cls.strip())
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	441
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	442 return pagediv
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	443
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	444 def _addGisTags(self, pagediv, pageinfo, docinfo):
0488cd12355b gis mode works again. casties parents: 609 diff changeset	445 """add links for gis places"""
0488cd12355b gis mode works again. casties parents: 609 diff changeset	446 # use last part of documentPath as db-id
0488cd12355b gis mode works again. casties parents: 609 diff changeset	447 docpath = docinfo.get('documentPath', '')
0488cd12355b gis mode works again. casties parents: 609 diff changeset	448 textid = docpath.split('/')[-1]
0488cd12355b gis mode works again. casties parents: 609 diff changeset	449 # add our URL as backlink
0488cd12355b gis mode works again. casties parents: 609 diff changeset	450 selfurl = self.getLink()
0488cd12355b gis mode works again. casties parents: 609 diff changeset	451 doc = base64.b64encode(selfurl)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	452 # check all span@class=place
0488cd12355b gis mode works again. casties parents: 609 diff changeset	453 spans = pagediv.findall(".//span[@class='place']")
0488cd12355b gis mode works again. casties parents: 609 diff changeset	454 for s in spans:
0488cd12355b gis mode works again. casties parents: 609 diff changeset	455 id = s.get('id')
0488cd12355b gis mode works again. casties parents: 609 diff changeset	456 if id:
0488cd12355b gis mode works again. casties parents: 609 diff changeset	457 # make links like http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/songy_tiang_zh_1637?id=N400061-02&doc=aHR...&format=gis
0488cd12355b gis mode works again. casties parents: 609 diff changeset	458 s.tag = 'a'
0488cd12355b gis mode works again. casties parents: 609 diff changeset	459 # TODO: make links configurable
0488cd12355b gis mode works again. casties parents: 609 diff changeset	460 url = "http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/%s?id=%s&doc=%s&format=gis"%(textid,id,doc)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	461 s.set('href', url)
0488cd12355b gis mode works again. casties parents: 609 diff changeset	462 s.set('target', '_blank')
0488cd12355b gis mode works again. casties parents: 609 diff changeset	463
0488cd12355b gis mode works again. casties parents: 609 diff changeset	464 return pagediv
0488cd12355b gis mode works again. casties parents: 609 diff changeset	465
566 4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	466 def _processFigures(self, pagediv, docinfo):
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	467 """processes figure-tags"""
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	468 # unfortunately etree can not select class.startswith('figure')
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	469 divs = pagediv.findall(".//span[@class]")
566 4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	470 scalerUrl = docinfo['digilibScalerUrl']
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	471 viewerUrl = docinfo['digilibViewerUrl']
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	472 for d in divs:
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	473 if not d.get('class').startswith('figure'):
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	474 continue
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	475
566 4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	476 try:
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	477 a = d.find('a')
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	478 img = a.find('img')
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	479 imgsrc = img.get('src')
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	480 imgurl = urlparse.urlparse(imgsrc)
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	481 imgq = imgurl.query
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	482 imgparams = urlparse.parse_qs(imgq)
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	483 fn = imgparams.get('fn', None)
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	484 if fn is not None:
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	485 # parse_qs puts parameters in lists
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	486 fn = fn[0]
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	487 # TODO: check valid path
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	488 # fix img@src
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	489 newsrc = '%s?fn=%s&dw=200&dh=200'%(scalerUrl,fn)
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	490 img.set('src', newsrc)
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	491 # fix a@href
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	492 newlink = '%s?fn=%s'%(viewerUrl,fn)
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	493 a.set('href', newlink)
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	494 a.set('target', '_blank')
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	495
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	496 except:
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	497 logging.warn("processFigures: strange figure!")
4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	498
583 ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	499
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	500 def _cleanSearchResult(self, pagediv):
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	501 """fixes search result html (change pbs and figures)"""
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	502 # replace figure-tag with figureNumText
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	503 for fig in pagediv.findall(".//span[@class='figure']"):
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	504 txt = fig.findtext(".//span[@class='figureNumText']")
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	505 tail = fig.tail
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	506 fig.clear()
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	507 fig.set('class', 'figure')
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	508 fig.text = txt
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	509 fig.tail = tail
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	510
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	511 # replace lb-tag with "//"
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	512 for lb in pagediv.findall(".//br[@class='lb']"):
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	513 lb.tag = 'span'
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	514 lb.text = '//'
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	515
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	516 # replace pb-tag with "///"
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	517 for pb in pagediv.findall(".//span[@class='pb']"):
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	518 tail = pb.tail
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	519 pb.clear()
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	520 pb.set('class', 'pb')
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	521 pb.text = '///'
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	522 pb.tail = tail
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	523
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	524 return pagediv
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	525
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	526 def _cleanSearchResult2(self, pagediv):
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	527 """fixes search result html (change pbs and figures)"""
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	528 # unfortunately etree can not select class.startswith('figure')
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	529 divs = pagediv.findall(".//span[@class]")
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	530 for d in divs:
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	531 cls = d.get('class')
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	532 if cls.startswith('figure'):
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	533 # replace figure-tag with figureNumText
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	534 txt = d.findtext(".//span[@class='figureNumText']")
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	535 d.clear()
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	536 d.set('class', 'figure')
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	537 d.text = txt
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	538
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	539 elif cls.startswith('pb'):
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	540 # replace pb-tag with "//"
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	541 d.clear()
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	542 d.set('class', 'pb')
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	543 d.text = '//'
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	544
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	545 return pagediv
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	546
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	547
566 4a31608f8b0e more new MpiwgXmlTextServer. casties parents: 565 diff changeset	548
565 1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	549 def _fixEmptyDivs(self, pagediv):
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	550 """fixes empty div-tags by inserting a space"""
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	551 divs = pagediv.findall('.//div')
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	552 for d in divs:
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	553 if len(d) == 0 and not d.text:
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	554 # make empty divs non-empty
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	555 d.text = ' '
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	556
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	557 return pagediv
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	558
1b483194901c more new MpiwgXmlTextServer. casties parents: 564 diff changeset	559
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	560 def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	561 """loads list of search results and stores XML in docinfo"""
583 ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	562 normMode = pageinfo.get('characterNormalization', 'reg')
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	563 logging.debug("getSearchResults mode=%s query=%s norm=%s"%(mode, query, normMode))
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	564 if mode == "none":
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	565 return docinfo
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	566
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	567 #TODO: put mode into query
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	568
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	569 cachedQuery = docinfo.get('cachedQuery', None)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	570 if cachedQuery is not None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	571 # cached search result
583 ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	572 if cachedQuery == '%s_%s_%s'%(mode,query,normMode):
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	573 # same query
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	574 return docinfo
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	575
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	576 else:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	577 # different query
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	578 del docinfo['resultSize']
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	579 del docinfo['results']
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	580
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	581 # cache query
583 ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	582 docinfo['cachedQuery'] = '%s_%s_%s'%(mode,query,normMode)
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	583
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	584 # fetch full results
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	585 docpath = docinfo['textURLPath']
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	586 params = {'docId': docpath,
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	587 'query': query,
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	588 'pageSize': 1000,
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	589 'page': 1,
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	590 'outputFormat': 'html'}
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	591 pagexml = self.getServerData("query/QueryDocument",urllib.urlencode(params))
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	592 results = []
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	593 try:
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	594 dom = ET.fromstring(pagexml)
583 ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	595 # clean html output
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	596 self._processWTags('plain', normMode, dom)
ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	597 self._cleanSearchResult(dom)
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	598 # page content is currently in multiple <td align=left>
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	599 alldivs = dom.findall(".//tr[@class='hit']")
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	600 for div in alldivs:
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	601 # change tr to div
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	602 div.tag = 'div'
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	603 # change td to span
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	604 for d in div.findall('td'):
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	605 d.tag = 'span'
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	606
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	607 # TODO: can we put etree in the session?
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	608 results.append(div)
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	609
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	610 except Exception, e:
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	611 logging.error("GetSearchResults: Error parsing search result: %s"%e)
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	612
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	613 # store results in docinfo
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	614 docinfo['resultSize'] = len(results)
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	615 docinfo['results'] = results
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	616
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	617 return docinfo
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	618
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	619
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	620 def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None):
583 ca0274423382 follow changes in html format of new text-backend. casties parents: 579 diff changeset	621 """returns single page from the list of search results"""
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	622 logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn))
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	623 # get (cached) result
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	624 self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	625
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	626 resultxml = docinfo.get('results', None)
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	627 if not resultxml:
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	628 logging.error("getResultPage: unable to find results")
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	629 return "Error: no result!"
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	630
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	631 if size is None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	632 size = pageinfo.get('resultPageSize', 10)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	633
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	634 if start is None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	635 start = (pn - 1) * size
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	636
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	637 if resultxml is not None:
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	638 # paginate
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	639 first = start-1
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	640 last = first+size
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	641 tocdivs = resultxml[first:last]
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	642
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	643 toc = ET.Element('div', attrib={'class':'queryResultPage'})
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	644 for div in tocdivs:
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	645 # check all a-tags
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	646 links = div.findall(".//a")
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	647 for l in links:
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	648 href = l.get('href')
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	649 if href:
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	650 # assume all links go to pages
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	651 linkUrl = urlparse.urlparse(href)
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	652 linkParams = urlparse.parse_qs(linkUrl.query)
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	653 # take some parameters (make sure it works even if the link was already parsed)
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	654 params = {'pn': linkParams.get('page',linkParams.get('pn', None)),
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	655 'highlightQuery': linkParams.get('highlightQuery',None),
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	656 'highlightElement': linkParams.get('highlightElem',linkParams.get('highlightElement',None)),
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	657 'highlightElementPos': linkParams.get('highlightElemPos',linkParams.get('highlightElementPos',None))
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	658 }
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	659 if not params['pn']:
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	660 logging.warn("getResultsPage: link has no page: %s"%href)
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	661
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	662 url = self.getLink(params=params)
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	663 l.set('href', url)
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	664
576 b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	665 toc.append(div)
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	666
b2c7e272e075 new w-tag solution with etree. search works now. casties parents: 575 diff changeset	667 return serialize(toc)
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	668
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	669 return "ERROR: no results!"
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	670
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	671
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	672 def getToc(self, mode='text', docinfo=None):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	673 """returns list of table of contents from docinfo"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	674 logging.debug("getToc mode=%s"%mode)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	675 if mode == 'text':
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	676 queryType = 'toc'
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	677 else:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	678 queryType = mode
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	679
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	680 if not 'full_%s'%queryType in docinfo:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	681 # get new toc
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	682 docinfo = self.getTextInfo(queryType, docinfo)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	683
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	684 return docinfo.get('full_%s'%queryType, [])
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	685
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	686
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	687 def getTocPage(self, mode='text', pn=None, start=None, size=None, pageinfo=None, docinfo=None):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	688 """returns single page from the table of contents"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	689 logging.debug("getTocPage mode=%s, pn=%s start=%s size=%s"%(mode,repr(pn),repr(start),repr(size)))
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	690 fulltoc = self.getToc(mode=mode, docinfo=docinfo)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	691 if len(fulltoc) < 1:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	692 logging.error("getTocPage: unable to find toc!")
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	693 return "Error: no table of contents!"
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	694
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	695 if size is None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	696 size = pageinfo.get('tocPageSize', 30)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	697
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	698 if start is None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	699 start = (pn - 1) * size
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	700
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	701 # paginate
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	702 first = (start - 1)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	703 last = first + size
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	704 tocs = fulltoc[first:last]
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	705 tp = '<div>'
609 7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	706 label = {'figures': 'Figure', 'notes': 'Note', 'handwritten': 'Handwritten note'}.get(mode, 'Item')
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	707 for toc in tocs:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	708 pageurl = self.getLink('pn', toc['pn'])
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	709 tp += '<div class="tocline">'
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	710 content = toc['content']
609 7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	711 lvs = toc['level-string']
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	712 if content:
609 7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	713 tp += '<div class="toc name">[%s] %s</div>'%(lvs, toc['content'])
7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	714 elif lvs:
7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	715 tp += '<div class="toc name">[%s %s]</div>'%(label, lvs)
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	716 else:
609 7962e6891d99 works with new notes and notesHandwritten. casties parents: 587 diff changeset	717 tp += '<div class="toc name">[%s]</div>'%(label)
568 694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	718
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	719 if toc.get('no', None):
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	720 tp += '<div class="toc page"><a href="%s">Page: %s (%s)</a></div>'%(pageurl, toc['pn'], toc['no'])
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	721 else:
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	722 tp += '<div class="toc page"><a href="%s">Page: %s</a></div>'%(pageurl, toc['pn'])
694935574177 more new MpiwgXmlTextServer. casties parents: 567 diff changeset	723
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	724 tp += '</div>\n'
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	725
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	726 tp += '</div>\n'
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	727
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	728 return tp
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	729
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	730
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	731 def manage_changeMpiwgXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,repositoryType=None,RESPONSE=None):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	732 """change settings"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	733 self.title=title
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	734 self.timeout = timeout
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	735 self.serverUrl = serverUrl
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	736 if repositoryType:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	737 self.repositoryType = repositoryType
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	738 if RESPONSE is not None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	739 RESPONSE.redirect('manage_main')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	740
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	741 # management methods
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	742 def manage_addMpiwgXmlTextServerForm(self):
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	743 """Form for adding"""
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	744 pt = PageTemplateFile("zpt/manage_addMpiwgXmlTextServer", globals()).__of__(self)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	745 return pt()
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	746
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	747 def manage_addMpiwgXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
577 9251719154a3 toc with list of handwritten notes. casties parents: 576 diff changeset	748 """add MpiwgXmlTextServer"""
564 31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	749 newObj = MpiwgXmlTextServer(id=id,title=title,serverUrl=serverUrl,timeout=timeout)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	750 self.Destination()._setObject(id, newObj)
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	751 if RESPONSE is not None:
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	752 RESPONSE.redirect('manage_main')
31f562fa7214 first version of MpiwgXmlTextServer. casties parents: diff changeset	753
610 0488cd12355b gis mode works again. casties parents: 609 diff changeset	754

Mercurial > hg > documentViewer

annotate MpiwgXmlTextServer.py @ 626:7dafe8283312