Mercurial > hg > documentViewer
annotate MpdlXmlTextServer.py @ 503:030251fe9dbc elementtree
more cleanup.
made viewType into viewLayer and viewType=xml into viewMode=xml.
author | casties |
---|---|
date | Thu, 16 Feb 2012 14:30:01 +0100 |
parents | 29c6d09a506c |
children | 67014399894d |
rev | line source |
---|---|
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
1 from OFS.SimpleItem import SimpleItem |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
455 | 3 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
4 from Ft.Xml import EMPTY_NAMESPACE, Parse |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
5 from Ft.Xml.Domlette import NonvalidatingReader |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
6 import Ft.Xml.Domlette |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
7 import cStringIO |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
8 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
9 import xml.etree.ElementTree as ET |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
10 |
455 | 11 import re |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
12 import logging |
134 | 13 import urllib |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
14 |
458 | 15 from SrvTxtUtils import getInt, getText, getHttpData |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
16 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
17 def serialize(node): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
18 """returns a string containing an XML snippet of node""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
19 s = ET.tostring(node, 'UTF-8') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
20 # snip off XML declaration |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
21 if s.startswith('<?xml'): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
22 i = s.find('?>') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
23 return s[i+3:] |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
24 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
25 return s |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
26 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
27 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
28 def getTextFromNode(node): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
29 """get the cdata content of a node""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
30 if node is None: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
31 return "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
32 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
33 # 4Suite: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
34 nodelist=node.childNodes |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
35 text = "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
36 for n in nodelist: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
37 if n.nodeType == node.TEXT_NODE: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
38 text = text + n.data |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
39 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
40 return text |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
41 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
42 def serializeNode(node, encoding="utf-8"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
43 """returns a string containing node as XML""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
44 #s = ET.tostring(node) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
45 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
46 # 4Suite: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
47 stream = cStringIO.StringIO() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
48 Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
49 s = stream.getvalue() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
50 stream.close() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
51 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
52 return s |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
53 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
54 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
55 class MpdlXmlTextServer(SimpleItem): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
56 """TextServer implementation for MPDL-XML eXist server""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
57 meta_type="MPDL-XML TextServer" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
58 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
59 manage_options=( |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
60 {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'}, |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
61 )+SimpleItem.manage_options |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
62 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
63 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals()) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
64 |
455 | 65 def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
66 """constructor""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
67 self.id=id |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
68 self.title=title |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
69 self.timeout = timeout |
132 | 70 if serverName is None: |
71 self.serverUrl = serverUrl | |
72 else: | |
73 self.serverUrl = "http://%s/mpdl/interface/"%serverName | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
74 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
75 def getHttpData(self, url, data=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
76 """returns result from url+data HTTP request""" |
458 | 77 return getHttpData(url,data,timeout=self.timeout) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
78 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
79 def getServerData(self, method, data=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
80 """returns result from text server for method+data""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
81 url = self.serverUrl+method |
458 | 82 return getHttpData(url,data,timeout=self.timeout) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
83 |
455 | 84 # WTF: what does this really do? can it be integrated in getPage? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
85 def getSearch(self, pageinfo=None, docinfo=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
86 """get search list""" |
455 | 87 logging.debug("getSearch()") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
88 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
89 url = docinfo['url'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
90 pagesize = pageinfo['queryPageSize'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
91 pn = pageinfo.get('searchPN',1) |
478 | 92 sn = pageinfo.get('sn',None) #TODO: is this s now? |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
93 highlightQuery = pageinfo['highlightQuery'] |
181 | 94 query =pageinfo['query'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
95 queryType =pageinfo['queryType'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
96 viewMode= pageinfo['viewMode'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
97 tocMode = pageinfo['tocMode'] |
169 | 98 characterNormalization = pageinfo['characterNormalization'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
99 #optionToggle = pageinfo['optionToggle'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
100 tocPN = pageinfo['tocPN'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
101 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
102 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery))) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
103 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
104 pagedom = Parse(pagexml) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
105 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
106 """ |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
107 pagedivs = pagedom.xpath("//div[@class='queryResultHits']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
108 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
109 if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
110 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
111 s = getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
112 s1 = int(s)/10+1 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
113 try: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
114 docinfo['queryResultHits'] = int(s1) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
115 logging.debug("SEARCH ENTRIES: %s"%(s1)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
116 except: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
117 docinfo['queryResultHits'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
118 """ |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
119 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
120 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
121 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
122 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
123 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
124 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
125 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
126 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
127 href = hrefNode.nodeValue |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
128 if href.startswith('page-fragment.xql'): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
129 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
130 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
131 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
132 #logging.debug("PUREXML :%s"%(serializeNode(pagenode))) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
133 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
134 if (queryType=="fulltextMorph"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
135 pagedivs = pagedom.xpath("//div[@class='queryResult']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
136 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
137 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
138 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
139 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
140 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
141 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
142 href = hrefNode.nodeValue |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
143 if href.startswith('page-fragment.xql'): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
144 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
145 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
146 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
147 if href.startswith('../lt/lemma.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
148 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
149 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
150 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
151 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
152 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
153 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
154 if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
155 pagedivs= pagedom.xpath("//div[@class='queryResultPage']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
156 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
157 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
158 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
159 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
160 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
161 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
162 href = hrefNode.nodeValue |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
163 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
164 if href.startswith('../lt/lex.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
165 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
166 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
167 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
168 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
169 if href.startswith('../lt/lemma.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
170 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
171 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
172 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
173 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
174 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
175 return "no text here" |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
176 |
253 | 177 def getGisPlaces(self, docinfo=None, pageinfo=None): |
207 | 178 """ Show all Gis Places of whole Page""" |
264 | 179 xpath='//place' |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
180 docpath = docinfo.get('textURLPath',None) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
181 if not docpath: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
182 return None |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
183 |
257 | 184 pn = pageinfo['current'] |
291 | 185 hrefList=[] |
306 | 186 myList= "" |
264 | 187 text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn)) |
455 | 188 dom = ET.fromstring(text) |
189 result = dom.findall(".//result/resultPage/place") | |
236 | 190 for l in result: |
455 | 191 href = l.get("id") |
292 | 192 hrefList.append(href) |
455 | 193 # WTF: what does this do? |
309 | 194 myList = ",".join(hrefList) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
195 #logging.debug("getGisPlaces :%s"%(myList)) |
307 | 196 return myList |
197 | |
198 def getAllGisPlaces (self, docinfo=None, pageinfo=None): | |
199 """Show all Gis Places of whole Book """ | |
200 xpath ='//echo:place' | |
201 hrefList=[] | |
202 myList="" | |
203 text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath)) | |
455 | 204 dom = ET.fromstring(text) |
205 result = dom.findall(".//result/resultPage/place") | |
395 | 206 |
307 | 207 for l in result: |
455 | 208 href = l.get("id") |
307 | 209 hrefList.append(href) |
455 | 210 # WTF: what does this do? |
300 | 211 myList = ",".join(hrefList) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
212 #logging.debug("getALLGisPlaces :%s"%(myList)) |
309 | 213 return myList |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
214 |
455 | 215 def processPageInfo(self, dom, docinfo, pageinfo): |
216 """processes page info divs from dom and stores in docinfo and pageinfo""" | |
469 | 217 # assume first second level div is pageMeta |
218 alldivs = dom.find("div") | |
473 | 219 |
220 if alldivs is None or alldivs.get('class', '') != 'pageMeta': | |
221 logging.error("processPageInfo: pageMeta div not found!") | |
222 return | |
223 | |
455 | 224 for div in alldivs: |
225 dc = div.get('class') | |
226 | |
227 # pageNumberOrig | |
469 | 228 if dc == 'pageNumberOrig': |
455 | 229 pageinfo['pageNumberOrig'] = div.text |
230 | |
231 # pageNumberOrigNorm | |
232 elif dc == 'pageNumberOrigNorm': | |
233 pageinfo['pageNumberOrigNorm'] = div.text | |
234 | |
474 | 235 # pageHeaderTitle |
236 elif dc == 'pageHeaderTitle': | |
237 pageinfo['pageHeaderTitle'] = div.text | |
238 | |
239 # numFigureEntries | |
455 | 240 elif dc == 'countFigureEntries': |
469 | 241 docinfo['numFigureEntries'] = getInt(div.text) |
455 | 242 |
474 | 243 # numTocEntries |
455 | 244 elif dc == 'countTocEntries': |
245 # WTF: s1 = int(s)/30+1 | |
469 | 246 docinfo['numTocEntries'] = getInt(div.text) |
247 | |
475 | 248 # numPlaces |
249 elif dc == 'countPlaces': | |
250 docinfo['numPlaces'] = getInt(div.text) | |
251 | |
455 | 252 # numTextPages |
253 elif dc == 'countPages': | |
458 | 254 np = getInt(div.text) |
455 | 255 if np > 0: |
256 docinfo['numTextPages'] = np | |
257 if docinfo.get('numPages', 0) == 0: | |
465 | 258 # seems to be text-only - update page count |
259 docinfo['numPages'] = np | |
477 | 260 #pageinfo['end'] = min(pageinfo['end'], np) |
455 | 261 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) |
262 if np % pageinfo['groupsize'] > 0: | |
263 pageinfo['numgroups'] += 1 | |
473 | 264 |
265 #logging.debug("processPageInfo: pageinfo=%s"%repr(pageinfo)) | |
455 | 266 return |
267 | |
388 | 268 |
471 | 269 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
270 """returns single page from fulltext""" |
455 | 271 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn)) |
478 | 272 # check for cached text -- but ideally this shouldn't be called twice |
455 | 273 if pageinfo.has_key('textPage'): |
274 logging.debug("getTextPage: using cached text") | |
275 return pageinfo['textPage'] | |
276 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
277 docpath = docinfo['textURLPath'] |
455 | 278 # just checking |
279 if pageinfo['current'] != pn: | |
280 logging.warning("getTextPage: current!=pn!") | |
281 | |
282 # stuff for constructing full urls | |
283 url = docinfo['url'] | |
284 urlmode = docinfo['mode'] | |
285 sn = pageinfo.get('sn', None) | |
286 highlightQuery = pageinfo.get('highlightQuery', None) | |
287 tocMode = pageinfo.get('tocMode', None) | |
288 tocPN = pageinfo.get('tocPN',None) | |
289 characterNormalization = pageinfo.get('characterNormalization', None) | |
478 | 290 |
291 selfurl = docinfo['viewerUrl'] | |
455 | 292 |
473 | 293 if mode == "dict" or mode == "text_dict": |
294 # dict is called textPollux in the backend | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
295 textmode = "textPollux" |
471 | 296 elif not mode: |
297 # default is text | |
475 | 298 mode = "text" |
471 | 299 textmode = "text" |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
300 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
301 textmode = mode |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
302 |
373 | 303 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) |
455 | 304 if highlightQuery: |
376 | 305 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
306 |
455 | 307 # fetch the page |
185 | 308 pagexml = self.getServerData("page-fragment.xql",textParam) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
309 dom = ET.fromstring(pagexml) |
455 | 310 # extract additional info |
311 self.processPageInfo(dom, docinfo, pageinfo) | |
312 # page content is in <div class="pageContent"> | |
313 pagediv = None | |
314 # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent'] | |
469 | 315 # so we look at the second level divs |
316 alldivs = dom.findall("div") | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
317 for div in alldivs: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
318 dc = div.get('class') |
455 | 319 # page content div |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
320 if dc == 'pageContent': |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
321 pagediv = div |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
322 break |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
323 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
324 # plain text mode |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
325 if mode == "text": |
478 | 326 # get full url assuming documentViewer is parent |
327 selfurl = self.getLink() | |
473 | 328 if pagediv is not None: |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
329 links = pagediv.findall(".//a") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
330 for l in links: |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
331 href = l.get('href') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
332 if href and href.startswith('#note-'): |
478 | 333 href = href.replace('#note-',"%s#note-"%selfurl) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
334 l.set('href', href) |
455 | 335 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
336 return serialize(pagediv) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
337 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
338 # text-with-links mode |
475 | 339 elif mode == "dict": |
473 | 340 if pagediv is not None: |
478 | 341 viewerurl = docinfo['viewerUrl'] |
342 selfurl = self.getLink() | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
343 # check all a-tags |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
344 links = pagediv.findall(".//a") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
345 for l in links: |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
346 href = l.get('href') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
347 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
348 if href: |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
349 # is link with href |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
350 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'): |
478 | 351 # is dictionary link - change href (keeping parameters) |
352 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl)) | |
353 # add target to open new page | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
354 l.set('target', '_blank') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
355 |
478 | 356 # TODO: is this needed? |
357 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
358 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
359 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
360 l.set('target', '_blank') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
361 l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
362 l.set('ondblclick', 'popupWin.focus();') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
363 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
364 if href.startswith('#note-'): |
478 | 365 # note link |
366 l.set('href', href.replace('#note-',"%s#note-"%selfurl)) | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
367 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
368 return serialize(pagediv) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
369 |
455 | 370 # xml mode |
371 elif mode == "xml": | |
473 | 372 if pagediv is not None: |
455 | 373 return serialize(pagediv) |
374 | |
375 # pureXml mode | |
376 elif mode == "pureXml": | |
473 | 377 if pagediv is not None: |
455 | 378 return serialize(pagediv) |
379 | |
380 # gis mode | |
381 elif mode == "gis": | |
382 name = docinfo['name'] | |
473 | 383 if pagediv is not None: |
455 | 384 # check all a-tags |
385 links = pagediv.findall(".//a") | |
386 for l in links: | |
387 href = l.get('href') | |
388 if href: | |
389 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'): | |
390 l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name)) | |
391 l.set('target', '_blank') | |
392 | |
393 return serialize(pagediv) | |
394 | |
501 | 395 return None |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
396 |
455 | 397 # TODO: should be getWordInfo |
478 | 398 def getWordInfo(self, word='', language='', display=''): |
399 """show information (like dictionaries) about word""" | |
400 data = self.getServerData("lt/wordInfo.xql","language=%s&word=%s&display=%s&output=html"%(language,urllib.quote(word),urllib.quote(display))) | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
401 return data |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
402 |
455 | 403 # WTF: what does this do? |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
404 def getLemma(self, lemma=None, language=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
405 """simular words lemma """ |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
406 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
407 return data |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
408 |
455 | 409 # WTF: what does this do? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
410 def getLemmaQuery(self, query=None, language=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
411 """simular words lemma """ |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
412 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
413 return data |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
414 |
455 | 415 # WTF: what does this do? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
416 def getLex(self, query=None, language=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
417 #simular words lemma |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
418 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
419 return data |
455 | 420 |
421 # WTF: what does this do? | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
422 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
423 #number of |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
424 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
425 pagesize = pageinfo['queryPageSize'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
426 pn = pageinfo['searchPN'] |
181 | 427 query =pageinfo['query'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
428 queryType =pageinfo['queryType'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
429 tocSearch = 0 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
430 tocDiv = None |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
431 |
179 | 432 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
433 pagedom = Parse(pagexml) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
434 numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
435 tocSearch = int(getTextFromNode(numdivs[0])) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
436 tc=int((tocSearch/10)+1) |
402 | 437 return tc |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
438 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
439 def getToc(self, mode="text", docinfo=None): |
455 | 440 """loads table of contents and stores XML in docinfo""" |
441 logging.debug("getToc mode=%s"%mode) | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
442 if mode == "none": |
455 | 443 return docinfo |
444 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
445 if 'tocSize_%s'%mode in docinfo: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
446 # cached toc |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
447 return docinfo |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
448 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
449 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
450 # we need to set a result set size |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
451 pagesize = 1000 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
452 pn = 1 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
453 if mode == "text": |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
454 queryType = "toc" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
455 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
456 queryType = mode |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
457 # number of entries in toc |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
458 tocSize = 0 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
459 tocDiv = None |
455 | 460 # fetch full toc |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
461 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
455 | 462 dom = ET.fromstring(pagexml) |
463 # page content is in <div class="queryResultPage"> | |
464 pagediv = None | |
465 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage'] | |
466 alldivs = dom.findall("div") | |
467 for div in alldivs: | |
468 dc = div.get('class') | |
469 # page content div | |
470 if dc == 'queryResultPage': | |
471 pagediv = div | |
472 | |
473 elif dc == 'queryResultHits': | |
458 | 474 docinfo['tocSize_%s'%mode] = getInt(div.text) |
455 | 475 |
476 if pagediv: | |
477 # store XML in docinfo | |
478 docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8') | |
479 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
480 return docinfo |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
481 |
482
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
482 def getTocPage(self, mode="text", pn=None, start=None, size=None, pageinfo=None, docinfo=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
483 """returns single page from the table of contents""" |
455 | 484 logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
485 if mode == "text": |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
486 queryType = "toc" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
487 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
488 queryType = mode |
455 | 489 |
490 # check for cached TOC | |
491 if not docinfo.has_key('tocXML_%s'%mode): | |
492 self.getToc(mode=mode, docinfo=docinfo) | |
493 | |
494 tocxml = docinfo.get('tocXML_%s'%mode, None) | |
495 if not tocxml: | |
496 logging.error("getTocPage: unable to find tocXML") | |
482
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
497 return "Error: no table of contents!" |
455 | 498 |
482
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
499 if size is None: |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
500 size = pageinfo.get('tocPageSize', 30) |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
501 |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
502 if start is None: |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
503 start = (pn - 1) * size |
455 | 504 |
505 fulltoc = ET.fromstring(tocxml) | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
506 |
455 | 507 if fulltoc: |
508 # paginate | |
482
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
509 first = (start - 1) * 2 |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
510 len = size * 2 |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
511 del fulltoc[:first] |
456 | 512 del fulltoc[len:] |
455 | 513 tocdivs = fulltoc |
514 | |
515 # check all a-tags | |
516 links = tocdivs.findall(".//a") | |
517 for l in links: | |
518 href = l.get('href') | |
519 if href: | |
520 # take pn from href | |
521 m = re.match(r'page-fragment\.xql.*pn=(\d+)', href) | |
522 if m is not None: | |
476 | 523 # and create new url (assuming parent is documentViewer) |
524 url = self.getLink('pn', m.group(1)) | |
525 l.set('href', url) | |
455 | 526 else: |
527 logging.warning("getTocPage: Problem with link=%s"%href) | |
528 | |
482
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
529 # fix two-divs-per-row with containing div |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
530 newtoc = ET.Element('div', {'class':'queryResultPage'}) |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
531 for (d1,d2) in zip(tocdivs[::2],tocdivs[1::2]): |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
532 e = ET.Element('div',{'class':'tocline'}) |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
533 e.append(d1) |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
534 e.append(d2) |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
535 newtoc.append(e) |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
536 |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
537 return serialize(newtoc) |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
538 |
7ca8ac7db06e
more new template stuff. more batching methods in documentViewer.
casties
parents:
478
diff
changeset
|
539 return "ERROR: no table of contents!" |
455 | 540 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
541 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
542 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
543 """change settings""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
544 self.title=title |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
545 self.timeout = timeout |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
546 self.serverUrl = serverUrl |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
547 if RESPONSE is not None: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
548 RESPONSE.redirect('manage_main') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
549 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
550 # management methods |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
551 def manage_addMpdlXmlTextServerForm(self): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
552 """Form for adding""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
553 pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
554 return pt() |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
555 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
556 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
557 #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
558 """add zogiimage""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
559 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
560 self.Destination()._setObject(id, newObj) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
561 if RESPONSE is not None: |
455 | 562 RESPONSE.redirect('manage_main') |
563 | |
564 |