Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: documentViewer/MpiwgXmlTextServer.py

Last change on this file was 632:4a75a760def2, checked in by Dirk Wintergruen <dwinter@…>, 9 years ago
dictionary handling added
File size: 30.2 KB

Line
1	from OFS.SimpleItem import SimpleItem
2	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
3
4	import xml.etree.ElementTree as ET
5
6	import re
7	import logging
8	import urllib
9	import urlparse
10	import base64
11
12	from datetime import datetime
13
14	from SrvTxtUtils import getInt, getText, getHttpData, serialize
15
16	# mapping of fields in the output of /mpiwg-mpdl-cms-web/query/GetDocInfo to documentViewer docinfo
17	textinfoFieldMap = {
18	'countPages' : 'numTextPages',
19	'countFigures' : 'numFigureEntries',
20	'countNotesHandwritten' : 'numHandwritten',
21	'countNotes' : 'numNotes',
22	'countPlaces' : 'numPlaces',
23	'countTocEntries' : 'numTocEntries'
24	}
25
26
27	class MpiwgXmlTextServer(SimpleItem):
28	"""TextServer implementation for MPIWG-XML server"""
29	meta_type="MPIWG-XML TextServer"
30
31	manage_options=(
32	{'label':'Config','action':'manage_changeMpiwgXmlTextServerForm'},
33	)+SimpleItem.manage_options
34
35	manage_changeMpiwgXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpiwgXmlTextServer", globals())
36
37	def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpiwg-mpdl-cms-web/", timeout=40, serverName=None, repositoryType='production'):
38	"""constructor"""
39	self.id=id
40	self.title=title
41	self.timeout = timeout
42	self.repositoryType = repositoryType
43	if serverName is None:
44	self.serverUrl = serverUrl
45	else:
46	self.serverUrl = "http://%s/mpiwg-mpdl-cms-web/"%serverName
47
48	def getHttpData(self, url, data=None):
49	"""returns result from url+data HTTP request"""
50	return getHttpData(url,data,timeout=self.timeout)
51
52	def getServerData(self, method, data=None):
53	"""returns result from text server for method+data"""
54	url = self.serverUrl+method
55	return getHttpData(url,data,timeout=self.timeout)
56
57
58	def getRepositoryType(self):
59	"""returns the repository type, e.g. 'production'"""
60	return getattr(self, 'repositoryType', None)
61
62	def getTextDownloadUrl(self, type='xml', docinfo=None):
63	"""returns a URL to download the current text"""
64	docpath = docinfo.get('textURLPath', None)
65	if not docpath:
66	return None
67
68	docpath = docpath.replace('.xml','.'+type)
69	url = '%sdoc/GetDocument?id=%s'%(self.serverUrl.replace('interface/',''), docpath)
70	return url
71
72
73	def getPlacesOnPage(self, docinfo=None, pn=None):
74	"""Returns list of GIS places of page pn"""
75	logging.debug("getPlacesOnPage(pn=%s"%pn)
76	if not 'places' in docinfo:
77	self.getTextInfo('places', docinfo)
78
79	allplaces = docinfo.get('places', None)
80	if len(allplaces) == 0:
81	return []
82
83	# search for places on this page TODO: is there a better way?
84	places = [p for p in allplaces if p['pn'] == pn]
85	return places
86	"""OLD:
87	docpath = docinfo.get('textURLPath',None)
88	if not docpath:
89	return None
90
91	places=[]
92	text=self.getServerData("xpath.xql", "document=%s&xpath=//place&pn=%s"%(docpath,pn))
93	dom = ET.fromstring(text)
94	result = dom.findall(".//resultPage/place")
95	for l in result:
96	id = l.get("id")
97	name = l.text
98	place = {'id': id, 'name': name}
99	places.append(place)
100
101	return places"""
102
103
104	def getTextInfo(self, mode=None, docinfo=None):
105	"""reads document info, including page concordance, from text server"""
106	logging.debug("getTextInfo mode=%s"%mode)
107
108	field = ''
109	if mode in ['pages', 'toc', 'figures', 'notes', 'handwritten', 'places']:
110	# translate mode to field param
111	if mode == 'handwritten':
112	field = '&field=notesHandwritten'
113	else:
114	field = '&field=%s'%mode
115	else:
116	mode = None
117
118	# check cached info
119	if mode:
120	# cached toc-request?
121	if 'full_%s'%mode in docinfo:
122	return docinfo
123
124	else:
125	# cached but no toc-request?
126	if 'numTextPages' in docinfo:
127	return docinfo
128
129	docpath = docinfo.get('textURLPath', None)
130	if docpath is None:
131	logging.error("getTextInfo: no textURLPath!")
132	return docinfo
133
134	# fetch docinfo
135	pagexml = self.getServerData("query/GetDocInfo","docId=%s%s"%(docpath,field))
136	dom = ET.fromstring(pagexml)
137	# all info in tag <doc>
138	doc = dom
139	if doc is None:
140	logging.error("getTextInfo: unable to find document-tag!")
141	else:
142	if mode is None:
143	# get general info from system-tag
144	sys = doc.find('system')
145	if sys is not None:
146	for (k,v) in textinfoFieldMap.items():
147	# copy into docinfo (even if empty)
148	docinfo[v] = getInt(getText(sys.find(k)))
149
150	else:
151	# result is in list-tag
152	l = doc.find('list')
153	if l is not None:
154	# look for general info
155	for (k,v) in textinfoFieldMap.items():
156	# copy into docinfo (only if not empty)
157	s = doc.find(k)
158	if s is not None:
159	docinfo[v] = getInt(getText(s))
160
161	lt = l.get('type')
162	#
163	# pageNumbers
164	#
165	if lt == 'pages':
166	# contains tags with page numbers
167	# <item n="14" o="2" o-norm="2" file="0014"/>
168	# n=scan number, o=original page no, on=normalized original page no
169	# pageNumbers is a dict indexed by scan number
170	pages = {}
171	for i in l:
172	page = {}
173	pn = getInt(i.get('n'))
174	page['pn'] = pn
175	no = i.get('o')
176	page['no'] = no
177	non = i.get('o-norm')
178	page['non'] = non
179
180	if pn > 0:
181	pages[pn] = page
182
183	docinfo['pageNumbers'] = pages
184
185	#
186	# toc
187	#
188	elif lt in ['toc', 'figures', 'notes', 'notesHandwritten']:
189	# contains tags with table of contents/figures
190	# <item n="2.1." lv="2">CAP.I. <ref o="119">132</ref></item>
191	tocs = []
192	for te in l:
193	if te.tag == 'item':
194	toc = {}
195	toc['level-string'] = te.get('n')
196	toc['level'] = te.get('lv')
197	toc['content'] = te.text.strip()
198	ref = te.find('ref')
199	toc['pn'] = getInt(ref.text)
200	toc['no'] = ref.get('o')
201	toc['non'] = ref.get('o-norm')
202	tocs.append(toc)
203
204	# save as full_toc/full_figures
205	docinfo['full_%s'%mode] = tocs
206
207	#
208	# places
209	#
210	#
211	# toc
212	#
213	elif lt in ['places']:
214	# contains tags with place-ids
215	# <item id="N40004F-01"><ref>4</ref></item>
216	places = []
217	for p in l:
218	if p.tag == 'item':
219	place = {}
220	place['id'] = p.get('id')
221	ref = p.find('ref')
222	place['pn'] = getInt(ref.text)
223	places.append(place)
224
225	docinfo['places'] = places
226
227	return docinfo
228
229
230	def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None):
231	"""returns single page from fulltext"""
232
233	logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn))
234	startTime = datetime.now()
235	# check for cached text -- but ideally this shouldn't be called twice
236	if pageinfo.has_key('textPage'):
237	logging.debug("getTextPage: using cached text")
238	return pageinfo['textPage']
239
240	docpath = docinfo.get('textURLPath', None)
241	if not docpath:
242	return None
243
244	# stuff for constructing full urls
245	selfurl = docinfo['viewerUrl']
246	textParams = {'docId': docpath,
247	'page': pn}
248
249	normMode = pageinfo.get('characterNormalization', 'reg')
250	# TODO: change values in form
251	if normMode == 'regPlusNorm':
252	normMode = 'norm'
253
254	# TODO: this should not be necessary when the backend is fixed
255	#textParams['normalization'] = normMode
256
257	if not mode:
258	# default is dict
259	mode = 'text'
260
261	modes = mode.split(',')
262	# check for multiple layers
263	if len(modes) > 1:
264	logging.debug("getTextPage: more than one mode=%s"%mode)
265
266	# mode defaults
267	gisMode = False
268	punditMode = False
269
270	# search mode
271	if 'search' in modes:
272	# add highlighting
273	highlightQuery = pageinfo.get('highlightQuery', None)
274	if highlightQuery:
275	textParams['highlightQuery'] = highlightQuery
276	textParams['highlightElem'] = pageinfo.get('highlightElement', '')
277	textParams['highlightElemPos'] = pageinfo.get('highlightElementPos', '')
278
279	# ignore mode in the following
280	modes.remove('search')
281
282	# pundit mode
283	if 'pundit' in modes:
284	punditMode = True
285	# ignore mode in the following
286	modes.remove('pundit')
287
288	# other modes don't combine
289	if 'dict' in modes:
290	textmode = 'dict'
291	textParams['outputFormat'] = 'html'
292	elif 'xml' in modes:
293	textmode = 'xml'
294	textParams['outputFormat'] = 'xmlDisplay'
295	normMode = 'orig'
296	elif 'gis' in modes:
297	gisMode = True
298	# gis mode uses plain text
299	textmode = 'plain'
300	textParams['outputFormat'] = 'html'
301	else:
302	# text is default mode
303	textmode = 'plain'
304	textParams['outputFormat'] = 'html'
305
306
307
308	try:
309	# fetch the page
310
311
312
313
314	pagexml = self.getServerData("query/GetPage",urllib.urlencode(textParams))
315	dom = ET.fromstring(pagexml)
316	except Exception, e:
317	logging.error("Error reading page: %s"%e)
318	return None
319
320	# plain text or text-with-links mode
321	if textmode == 'plain' or textmode == 'dict':
322	# the text is in div@class=text
323	pagediv = dom.find(".//div[@class='text']")
324	logging.debug("pagediv: %s"%repr(pagediv))
325	if pagediv is not None:
326	# add textmode and normMode classes
327	#pagediv.set('class', 'text %s %s'%(textmode, normMode))
328	self._processWTags(textmode, normMode, pagediv)
329	#self._processPbTag(pagediv, pageinfo)
330	self._processFigures(pagediv, docinfo)
331	#self._fixEmptyDivs(pagediv)
332	# get full url assuming documentViewer is parent
333	selfurl = self.getLink()
334	# check all a-tags
335	links = pagediv.findall('.//a')
336	for l in links:
337	href = l.get('href')
338	if href:
339	# is link with href
340	linkurl = urlparse.urlparse(href)
341	if linkurl.path.endswith('GetDictionaryEntries'):
342	#TODO: replace wordInfo page
343	# add target to open new page
344	l.set('target', '_blank')
345
346	if punditMode:
347	self._addPunditAttributes(pagediv, pageinfo, docinfo)
348
349	if gisMode:
350	self._addGisTags(pagediv, pageinfo, docinfo)
351
352	s = serialize(pagediv)
353	logging.debug("getTextPage done in %s"%(datetime.now()-startTime))
354	return s
355
356	# xml mode
357	elif textmode == "xml":
358	# the text is in body
359	pagediv = dom.find(".//body")
360	logging.debug("pagediv: %s"%repr(pagediv))
361	if pagediv is not None:
362	return serialize(pagediv)
363
364	logging.error("getTextPage: error in text mode %s or in text!"%(textmode))
365	return None
366
367	def _processWTags(self, textMode, normMode, pagediv):
368	"""selects the necessary information from w-spans and removes the rest from pagediv"""
369	logging.debug("processWTags(textMode=%s,norm=%s,pagediv"%(repr(textMode),repr(normMode)))
370	startTime = datetime.now()
371	wtags = pagediv.findall(".//span[@class='w']")
372	for wtag in wtags:
373	if textMode == 'dict':
374	# delete non-a-tags
375	wtag.remove(wtag.find("span[@class='nodictionary orig']"))
376	wtag.remove(wtag.find("span[@class='nodictionary reg']"))
377	wtag.remove(wtag.find("span[@class='nodictionary norm']"))
378	# delete non-matching children of a-tag and suppress remaining tag name
379	atag = wtag.find("*[@class='dictionary']")
380
381	if atag is None: #nicht gefunden weil noch andere Eintraege im class tag
382	for w in wtag.findall("a"):
383	val = w.attrib.get("class","")
384	if val.startswith("dictionary"):
385	atag=w
386	break
387
388
389
390
391
392	if normMode == 'orig':
393	atag.remove(atag.find("span[@class='reg']"))
394	atag.remove(atag.find("span[@class='norm']"))
395	atag.find("span[@class='orig']").tag = None
396	elif normMode == 'reg':
397	atag.remove(atag.find("span[@class='orig']"))
398	atag.remove(atag.find("span[@class='norm']"))
399	atag.find("span[@class='reg']").tag = None
400	elif normMode == 'norm':
401	atag.remove(atag.find("span[@class='orig']"))
402	atag.remove(atag.find("span[@class='reg']"))
403	atag.find("span[@class='norm']").tag = None
404
405	else:
406	# delete a-tag
407
408
409	wt = wtag.find("*[@class='dictionary']")
410
411	if wt is None: #nicht gefunden weil noch andere Eintraege im class tag vorhanden sind
412	for w in wtag.findall("a"):
413	val = w.attrib.get("class","")
414	if val.startswith("dictionary"):
415	wt=w
416	break
417
418
419
420
421	wtag.remove(wt)
422	# delete non-matching children and suppress remaining tag name
423	if normMode == 'orig':
424	wtag.remove(wtag.find("span[@class='nodictionary reg']"))
425	wtag.remove(wtag.find("span[@class='nodictionary norm']"))
426	wtag.find("span[@class='nodictionary orig']").tag = None
427	elif normMode == 'reg':
428	wtag.remove(wtag.find("span[@class='nodictionary orig']"))
429	wtag.remove(wtag.find("span[@class='nodictionary norm']"))
430	wtag.find("span[@class='nodictionary reg']").tag = None
431	elif normMode == 'norm':
432	wtag.remove(wtag.find("span[@class='nodictionary orig']"))
433	wtag.remove(wtag.find("span[@class='nodictionary reg']"))
434	wtag.find("span[@class='nodictionary norm']").tag = None
435
436	# suppress w-tag name
437	wtag.tag = None
438
439	logging.debug("processWTags in %s"%(datetime.now()-startTime))
440	return pagediv
441
442	def _processPbTag(self, pagediv, pageinfo):
443	"""extracts information from pb-tag and removes it from pagediv"""
444	pbdiv = pagediv.find(".//span[@class='pb']")
445	if pbdiv is None:
446	logging.warning("getTextPage: no pb-span!")
447	return pagediv
448
449	# extract running head
450	rh = pbdiv.find(".//span[@class='rhead']")
451	if rh is not None:
452	pageinfo['pageHeaderTitle'] = getText(rh)
453
454	# remove pb-div from parent
455	ppdiv = pagediv.find(".//span[@class='pb']/..")
456	ppdiv.remove(pbdiv)
457	return pagediv
458
459	def _addPunditAttributes(self, pagediv, pageinfo, docinfo):
460	"""add about-attributes to divs for pundit annotation tool"""
461	textid = docinfo.get('DRI', "fn=%s"%docinfo.get('documentPath', '???'))
462	pn = pageinfo.get('pn', '1')
463	# check all div-tags
464	divs = pagediv.findall(".//div")
465	for d in divs:
466	id = d.get('id')
467	if id:
468	# TODO: check path (cf RFC2396)
469	d.set('about', "http://echo.mpiwg-berlin.mpg.de/%s/pn=%s/#%s"%(textid,pn,id))
470	cls = d.get('class','')
471	cls += ' pundit-content'
472	d.set('class', cls.strip())
473
474	return pagediv
475
476	def _addGisTags(self, pagediv, pageinfo, docinfo):
477	"""add links for gis places"""
478	# use last part of documentPath as db-id
479	docpath = docinfo.get('documentPath', '')
480	textid = docpath.split('/')[-1]
481	# add our URL as backlink
482	selfurl = self.getLink()
483	doc = base64.b64encode(selfurl)
484	# check all span@class=place
485	spans = pagediv.findall(".//span[@class='place']")
486	for s in spans:
487	id = s.get('id')
488	if id:
489	# make links like http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/songy_tiang_zh_1637?id=N400061-02&doc=aHR...&format=gis
490	s.tag = 'a'
491	# TODO: make links configurable
492	url = "http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/%s?id=%s&doc=%s&format=gis"%(textid,id,doc)
493	s.set('href', url)
494	s.set('target', '_blank')
495
496	return pagediv
497
498	def _processFigures(self, pagediv, docinfo):
499	"""processes figure-tags"""
500	# unfortunately etree can not select class.startswith('figure')
501	divs = pagediv.findall(".//span[@class]")
502	scalerUrl = docinfo['digilibScalerUrl']
503	viewerUrl = docinfo['digilibViewerUrl']
504	for d in divs:
505	if not d.get('class').startswith('figure'):
506	continue
507
508	try:
509	a = d.find('a')
510	img = a.find('img')
511	imgsrc = img.get('src')
512	imgurl = urlparse.urlparse(imgsrc)
513	imgq = imgurl.query
514	imgparams = urlparse.parse_qs(imgq)
515	fn = imgparams.get('fn', None)
516	if fn is not None:
517	# parse_qs puts parameters in lists
518	fn = fn[0]
519	# TODO: check valid path
520	# fix img@src
521	newsrc = '%s?fn=%s&dw=200&dh=200'%(scalerUrl,fn)
522	img.set('src', newsrc)
523	# fix a@href
524	newlink = '%s?fn=%s'%(viewerUrl,fn)
525	a.set('href', newlink)
526	a.set('target', '_blank')
527
528	except:
529	logging.warn("processFigures: strange figure!")
530
531
532	def _cleanSearchResult(self, pagediv):
533	"""fixes search result html (change pbs and figures)"""
534	# replace figure-tag with figureNumText
535	for fig in pagediv.findall(".//span[@class='figure']"):
536	txt = fig.findtext(".//span[@class='figureNumText']")
537	tail = fig.tail
538	fig.clear()
539	fig.set('class', 'figure')
540	fig.text = txt
541	fig.tail = tail
542
543	# replace lb-tag with "//"
544	for lb in pagediv.findall(".//br[@class='lb']"):
545	lb.tag = 'span'
546	lb.text = '//'
547
548	# replace pb-tag with "///"
549	for pb in pagediv.findall(".//span[@class='pb']"):
550	tail = pb.tail
551	pb.clear()
552	pb.set('class', 'pb')
553	pb.text = '///'
554	pb.tail = tail
555
556	return pagediv
557
558	def _cleanSearchResult2(self, pagediv):
559	"""fixes search result html (change pbs and figures)"""
560	# unfortunately etree can not select class.startswith('figure')
561	divs = pagediv.findall(".//span[@class]")
562	for d in divs:
563	cls = d.get('class')
564	if cls.startswith('figure'):
565	# replace figure-tag with figureNumText
566	txt = d.findtext(".//span[@class='figureNumText']")
567	d.clear()
568	d.set('class', 'figure')
569	d.text = txt
570
571	elif cls.startswith('pb'):
572	# replace pb-tag with "//"
573	d.clear()
574	d.set('class', 'pb')
575	d.text = '//'
576
577	return pagediv
578
579
580
581	def _fixEmptyDivs(self, pagediv):
582	"""fixes empty div-tags by inserting a space"""
583	divs = pagediv.findall('.//div')
584	for d in divs:
585	if len(d) == 0 and not d.text:
586	# make empty divs non-empty
587	d.text = ' '
588
589	return pagediv
590
591
592	def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None):
593	"""loads list of search results and stores XML in docinfo"""
594	normMode = pageinfo.get('characterNormalization', 'reg')
595	logging.debug("getSearchResults mode=%s query=%s norm=%s"%(mode, query, normMode))
596	if mode == "none":
597	return docinfo
598
599	#TODO: put mode into query
600
601	cachedQuery = docinfo.get('cachedQuery', None)
602	if cachedQuery is not None:
603	# cached search result
604	if cachedQuery == '%s_%s_%s'%(mode,query,normMode):
605	# same query
606	return docinfo
607
608	else:
609	# different query
610	del docinfo['resultSize']
611	del docinfo['results']
612
613	# cache query
614	docinfo['cachedQuery'] = '%s_%s_%s'%(mode,query,normMode)
615
616	# fetch full results
617	docpath = docinfo['textURLPath']
618	params = {'docId': docpath,
619	'query': query,
620	'pageSize': 1000,
621	'page': 1,
622	'outputFormat': 'html'}
623	pagexml = self.getServerData("query/QueryDocument",urllib.urlencode(params))
624	results = []
625	try:
626	dom = ET.fromstring(pagexml)
627	# clean html output
628	self._processWTags('plain', normMode, dom)
629	self._cleanSearchResult(dom)
630	# page content is currently in multiple <td align=left>
631	alldivs = dom.findall(".//tr[@class='hit']")
632	for div in alldivs:
633	# change tr to div
634	div.tag = 'div'
635	# change td to span
636	for d in div.findall('td'):
637	d.tag = 'span'
638
639	# TODO: can we put etree in the session?
640	results.append(div)
641
642	except Exception, e:
643	logging.error("GetSearchResults: Error parsing search result: %s"%e)
644
645	# store results in docinfo
646	docinfo['resultSize'] = len(results)
647	docinfo['results'] = results
648
649	return docinfo
650
651
652	def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None):
653	"""returns single page from the list of search results"""
654	logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn))
655	# get (cached) result
656	self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo)
657
658	resultxml = docinfo.get('results', None)
659	if not resultxml:
660	logging.error("getResultPage: unable to find results")
661	return "Error: no result!"
662
663	if size is None:
664	size = pageinfo.get('resultPageSize', 10)
665
666	if start is None:
667	start = (pn - 1) * size
668
669	if resultxml is not None:
670	# paginate
671	first = start-1
672	last = first+size
673	tocdivs = resultxml[first:last]
674
675	toc = ET.Element('div', attrib={'class':'queryResultPage'})
676	for div in tocdivs:
677	# check all a-tags
678	links = div.findall(".//a")
679	for l in links:
680	href = l.get('href')
681	if href:
682	# assume all links go to pages
683	linkUrl = urlparse.urlparse(href)
684	linkParams = urlparse.parse_qs(linkUrl.query)
685	# take some parameters (make sure it works even if the link was already parsed)
686	params = {'pn': linkParams.get('page',linkParams.get('pn', None)),
687	'highlightQuery': linkParams.get('highlightQuery',None),
688	'highlightElement': linkParams.get('highlightElem',linkParams.get('highlightElement',None)),
689	'highlightElementPos': linkParams.get('highlightElemPos',linkParams.get('highlightElementPos',None))
690	}
691	if not params['pn']:
692	logging.warn("getResultsPage: link has no page: %s"%href)
693
694	url = self.getLink(params=params)
695	l.set('href', url)
696
697	toc.append(div)
698
699	return serialize(toc)
700
701	return "ERROR: no results!"
702
703
704	def getToc(self, mode='text', docinfo=None):
705	"""returns list of table of contents from docinfo"""
706	logging.debug("getToc mode=%s"%mode)
707	if mode == 'text':
708	queryType = 'toc'
709	else:
710	queryType = mode
711
712	if not 'full_%s'%queryType in docinfo:
713	# get new toc
714	docinfo = self.getTextInfo(queryType, docinfo)
715
716	return docinfo.get('full_%s'%queryType, [])
717
718
719	def getTocPage(self, mode='text', pn=None, start=None, size=None, pageinfo=None, docinfo=None):
720	"""returns single page from the table of contents"""
721	logging.debug("getTocPage mode=%s, pn=%s start=%s size=%s"%(mode,repr(pn),repr(start),repr(size)))
722	fulltoc = self.getToc(mode=mode, docinfo=docinfo)
723	if len(fulltoc) < 1:
724	logging.error("getTocPage: unable to find toc!")
725	return "Error: no table of contents!"
726
727	if size is None:
728	size = pageinfo.get('tocPageSize', 30)
729
730	if start is None:
731	start = (pn - 1) * size
732
733	# paginate
734	first = (start - 1)
735	last = first + size
736	tocs = fulltoc[first:last]
737	tp = '<div>'
738	label = {'figures': 'Figure', 'notes': 'Note', 'handwritten': 'Handwritten note'}.get(mode, 'Item')
739	for toc in tocs:
740	pageurl = self.getLink('pn', toc['pn'])
741	tp += '<div class="tocline">'
742	content = toc['content']
743	lvs = toc['level-string']
744	if content:
745	tp += '<div class="toc name">[%s] %s</div>'%(lvs, toc['content'])
746	elif lvs:
747	tp += '<div class="toc name">[%s %s]</div>'%(label, lvs)
748	else:
749	tp += '<div class="toc name">[%s]</div>'%(label)
750
751	if toc.get('no', None):
752	tp += '<div class="toc page"><a href="%s">Page: %s (%s)</a></div>'%(pageurl, toc['pn'], toc['no'])
753	else:
754	tp += '<div class="toc page"><a href="%s">Page: %s</a></div>'%(pageurl, toc['pn'])
755
756	tp += '</div>\n'
757
758	tp += '</div>\n'
759
760	return tp
761
762
763	def manage_changeMpiwgXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,repositoryType=None,RESPONSE=None):
764	"""change settings"""
765	self.title=title
766	self.timeout = timeout
767	self.serverUrl = serverUrl
768	if repositoryType:
769	self.repositoryType = repositoryType
770	if RESPONSE is not None:
771	RESPONSE.redirect('manage_main')
772
773	# management methods
774	def manage_addMpiwgXmlTextServerForm(self):
775	"""Form for adding"""
776	pt = PageTemplateFile("zpt/manage_addMpiwgXmlTextServer", globals()).__of__(self)
777	return pt()
778
779	def manage_addMpiwgXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
780	"""add MpiwgXmlTextServer"""
781	newObj = MpiwgXmlTextServer(id=id,title=title,serverUrl=serverUrl,timeout=timeout)
782	self.Destination()._setObject(id, newObj)
783	if RESPONSE is not None:
784	RESPONSE.redirect('manage_main')
785
786

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: documentViewer/MpiwgXmlTextServer.py

Download in other formats: