Mercurial > hg > documentViewer
annotate MpdlXmlTextServer.py @ 456:b27a7d2f06ff elementtree
even toc pagination works
author | casties |
---|---|
date | Fri, 15 Jul 2011 21:45:52 +0200 |
parents | 0a53fea83df7 |
children | 48b135b089c8 |
rev | line source |
---|---|
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
1 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
2 from OFS.SimpleItem import SimpleItem |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
455 | 4 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
5 from Ft.Xml import EMPTY_NAMESPACE, Parse |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
6 from Ft.Xml.Domlette import NonvalidatingReader |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
7 import Ft.Xml.Domlette |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
8 import cStringIO |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
9 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
10 import xml.etree.ElementTree as ET |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
11 |
455 | 12 import re |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
13 import logging |
134 | 14 import urllib |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
15 import documentViewer |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
16 #from documentViewer import getTextFromNode, serializeNode |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
17 |
455 | 18 def intOr0(s, default=0): |
19 """convert s to int or return default""" | |
20 try: | |
21 return int(s) | |
22 except: | |
23 return default | |
24 | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
25 def getText(node): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
26 """get the cdata content of a node""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
27 if node is None: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
28 return "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
29 # ET: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
30 text = node.text or "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
31 for e in node: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
32 text += gettext(e) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
33 if e.tail: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
34 text += e.tail |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
35 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
36 return text |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
37 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
38 def serialize(node): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
39 """returns a string containing an XML snippet of node""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
40 s = ET.tostring(node, 'UTF-8') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
41 # snip off XML declaration |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
42 if s.startswith('<?xml'): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
43 i = s.find('?>') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
44 return s[i+3:] |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
45 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
46 return s |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
47 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
48 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
49 def getTextFromNode(node): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
50 """get the cdata content of a node""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
51 if node is None: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
52 return "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
53 # ET: |
455 | 54 # text = node.text or "" |
55 # for e in node: | |
56 # text += gettext(e) | |
57 # if e.tail: | |
58 # text += e.tail | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
59 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
60 # 4Suite: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
61 nodelist=node.childNodes |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
62 text = "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
63 for n in nodelist: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
64 if n.nodeType == node.TEXT_NODE: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
65 text = text + n.data |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
66 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
67 return text |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
68 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
69 def serializeNode(node, encoding="utf-8"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
70 """returns a string containing node as XML""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
71 #s = ET.tostring(node) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
72 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
73 # 4Suite: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
74 stream = cStringIO.StringIO() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
75 Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
76 s = stream.getvalue() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
77 stream.close() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
78 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
79 return s |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
80 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
81 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
82 class MpdlXmlTextServer(SimpleItem): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
83 """TextServer implementation for MPDL-XML eXist server""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
84 meta_type="MPDL-XML TextServer" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
85 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
86 manage_options=( |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
87 {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'}, |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
88 )+SimpleItem.manage_options |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
89 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
90 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals()) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
91 |
455 | 92 def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
93 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
94 """constructor""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
95 self.id=id |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
96 self.title=title |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
97 self.timeout = timeout |
132 | 98 if serverName is None: |
99 self.serverUrl = serverUrl | |
100 else: | |
101 self.serverUrl = "http://%s/mpdl/interface/"%serverName | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
102 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
103 def getHttpData(self, url, data=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
104 """returns result from url+data HTTP request""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
105 return documentViewer.getHttpData(url,data,timeout=self.timeout) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
106 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
107 def getServerData(self, method, data=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
108 """returns result from text server for method+data""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
109 url = self.serverUrl+method |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
110 return documentViewer.getHttpData(url,data,timeout=self.timeout) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
111 |
455 | 112 # WTF: what does this really do? can it be integrated in getPage? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
113 def getSearch(self, pageinfo=None, docinfo=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
114 """get search list""" |
455 | 115 logging.debug("getSearch()") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
116 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
117 url = docinfo['url'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
118 pagesize = pageinfo['queryPageSize'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
119 pn = pageinfo.get('searchPN',1) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
120 sn = pageinfo['sn'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
121 highlightQuery = pageinfo['highlightQuery'] |
181 | 122 query =pageinfo['query'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
123 queryType =pageinfo['queryType'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
124 viewMode= pageinfo['viewMode'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
125 tocMode = pageinfo['tocMode'] |
169 | 126 characterNormalization = pageinfo['characterNormalization'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
127 #optionToggle = pageinfo['optionToggle'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
128 tocPN = pageinfo['tocPN'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
129 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
130 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery))) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
131 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
132 pagedom = Parse(pagexml) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
133 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
134 """ |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
135 pagedivs = pagedom.xpath("//div[@class='queryResultHits']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
136 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
137 if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
138 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
139 s = getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
140 s1 = int(s)/10+1 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
141 try: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
142 docinfo['queryResultHits'] = int(s1) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
143 logging.debug("SEARCH ENTRIES: %s"%(s1)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
144 except: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
145 docinfo['queryResultHits'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
146 """ |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
147 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
148 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
149 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
150 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
151 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
152 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
153 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
154 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
155 href = hrefNode.nodeValue |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
156 if href.startswith('page-fragment.xql'): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
157 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
158 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
159 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
160 #logging.debug("PUREXML :%s"%(serializeNode(pagenode))) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
161 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
162 if (queryType=="fulltextMorph"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
163 pagedivs = pagedom.xpath("//div[@class='queryResult']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
164 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
165 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
166 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
167 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
168 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
169 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
170 href = hrefNode.nodeValue |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
171 if href.startswith('page-fragment.xql'): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
172 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
173 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
174 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
175 if href.startswith('../lt/lemma.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
176 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
177 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
178 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
179 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
180 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
181 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
182 if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
183 pagedivs= pagedom.xpath("//div[@class='queryResultPage']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
184 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
185 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
186 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
187 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
188 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
189 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
190 href = hrefNode.nodeValue |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
191 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
192 if href.startswith('../lt/lex.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
193 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
194 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
195 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
196 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
197 if href.startswith('../lt/lemma.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
198 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
199 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
200 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
201 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
202 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
203 return "no text here" |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
204 |
253 | 205 def getGisPlaces(self, docinfo=None, pageinfo=None): |
207 | 206 """ Show all Gis Places of whole Page""" |
264 | 207 xpath='//place' |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
208 docpath = docinfo.get('textURLPath',None) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
209 if not docpath: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
210 return None |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
211 |
253 | 212 url = docinfo['url'] |
213 selfurl = self.absolute_url() | |
257 | 214 pn = pageinfo['current'] |
291 | 215 hrefList=[] |
306 | 216 myList= "" |
264 | 217 text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn)) |
455 | 218 dom = ET.fromstring(text) |
219 result = dom.findall(".//result/resultPage/place") | |
236 | 220 for l in result: |
455 | 221 href = l.get("id") |
292 | 222 hrefList.append(href) |
455 | 223 # WTF: what does this do? |
309 | 224 myList = ",".join(hrefList) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
225 #logging.debug("getGisPlaces :%s"%(myList)) |
307 | 226 return myList |
227 | |
228 def getAllGisPlaces (self, docinfo=None, pageinfo=None): | |
229 """Show all Gis Places of whole Book """ | |
230 xpath ='//echo:place' | |
231 docpath =docinfo['textURLPath'] | |
232 url = docinfo['url'] | |
233 selfurl =self.absolute_url() | |
234 pn =pageinfo['current'] | |
235 hrefList=[] | |
236 myList="" | |
237 text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath)) | |
455 | 238 dom = ET.fromstring(text) |
239 result = dom.findall(".//result/resultPage/place") | |
395 | 240 |
307 | 241 for l in result: |
455 | 242 href = l.get("id") |
307 | 243 hrefList.append(href) |
455 | 244 # WTF: what does this do? |
300 | 245 myList = ",".join(hrefList) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
246 #logging.debug("getALLGisPlaces :%s"%(myList)) |
309 | 247 return myList |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
248 |
455 | 249 def processPageInfo(self, dom, docinfo, pageinfo): |
250 """processes page info divs from dom and stores in docinfo and pageinfo""" | |
251 # process all toplevel divs | |
252 alldivs = dom.findall(".//div") | |
253 pagediv = None | |
254 for div in alldivs: | |
255 dc = div.get('class') | |
256 | |
257 # page content div | |
258 if dc == 'pageContent': | |
259 pagediv = div | |
260 | |
261 # pageNumberOrig | |
262 elif dc == 'pageNumberOrig': | |
263 pageinfo['pageNumberOrig'] = div.text | |
264 | |
265 # pageNumberOrigNorm | |
266 elif dc == 'pageNumberOrigNorm': | |
267 pageinfo['pageNumberOrigNorm'] = div.text | |
268 | |
269 # pageNumberOrigNorm | |
270 elif dc == 'countFigureEntries': | |
271 docinfo['countFigureEntries'] = intOr0(div.text) | |
272 | |
273 # pageNumberOrigNorm | |
274 elif dc == 'countTocEntries': | |
275 # WTF: s1 = int(s)/30+1 | |
276 docinfo['countTocEntries'] = intOr0(div.text) | |
277 | |
278 # numTextPages | |
279 elif dc == 'countPages': | |
280 np = intOr0(div.text) | |
281 if np > 0: | |
282 docinfo['numTextPages'] = np | |
283 if docinfo.get('numPages', 0) == 0: | |
284 # seems to be text-only | |
285 docinfo['numTextPages'] = np | |
286 pageinfo['end'] = min(pageinfo['end'], np) | |
287 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) | |
288 if np % pageinfo['groupsize'] > 0: | |
289 pageinfo['numgroups'] += 1 | |
290 | |
291 return | |
292 | |
388 | 293 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
294 def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
295 """returns single page from fulltext""" |
455 | 296 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn)) |
297 # check for cached text -- but this shouldn't be called twice | |
298 if pageinfo.has_key('textPage'): | |
299 logging.debug("getTextPage: using cached text") | |
300 return pageinfo['textPage'] | |
301 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
302 docpath = docinfo['textURLPath'] |
455 | 303 # just checking |
304 if pageinfo['current'] != pn: | |
305 logging.warning("getTextPage: current!=pn!") | |
306 | |
307 # stuff for constructing full urls | |
308 url = docinfo['url'] | |
309 urlmode = docinfo['mode'] | |
310 sn = pageinfo.get('sn', None) | |
311 highlightQuery = pageinfo.get('highlightQuery', None) | |
312 tocMode = pageinfo.get('tocMode', None) | |
313 tocPN = pageinfo.get('tocPN',None) | |
314 characterNormalization = pageinfo.get('characterNormalization', None) | |
315 selfurl = docinfo['viewerUrl'] | |
316 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
317 if mode == "text_dict": |
455 | 318 # text_dict is called textPollux in the backend |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
319 textmode = "textPollux" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
320 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
321 textmode = mode |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
322 |
373 | 323 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) |
455 | 324 if highlightQuery: |
376 | 325 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
326 |
455 | 327 # fetch the page |
185 | 328 pagexml = self.getServerData("page-fragment.xql",textParam) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
329 dom = ET.fromstring(pagexml) |
455 | 330 # extract additional info |
331 self.processPageInfo(dom, docinfo, pageinfo) | |
332 # page content is in <div class="pageContent"> | |
333 pagediv = None | |
334 # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent'] | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
335 alldivs = dom.findall(".//div") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
336 for div in alldivs: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
337 dc = div.get('class') |
455 | 338 # page content div |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
339 if dc == 'pageContent': |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
340 pagediv = div |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
341 break |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
342 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
343 # plain text mode |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
344 if mode == "text": |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
345 if pagediv: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
346 links = pagediv.findall(".//a") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
347 for l in links: |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
348 href = l.get('href') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
349 if href and href.startswith('#note-'): |
455 | 350 href = href.replace('#note-',"?mode=%s&url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn)) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
351 l.set('href', href) |
455 | 352 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
353 return serialize(pagediv) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
354 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
355 # text-with-links mode |
455 | 356 elif mode == "text_dict": |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
357 if pagediv: |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
358 # check all a-tags |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
359 links = pagediv.findall(".//a") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
360 for l in links: |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
361 href = l.get('href') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
362 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
363 if href: |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
364 # is link with href |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
365 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
366 # is pollux link |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
367 selfurl = self.absolute_url() |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
368 # change href |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
369 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
370 # add target |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
371 l.set('target', '_blank') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
372 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
373 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
374 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
375 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
376 l.set('target', '_blank') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
377 l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
378 l.set('ondblclick', 'popupWin.focus();') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
379 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
380 if href.startswith('#note-'): |
455 | 381 l.set('href', href.replace('#note-',"?mode=%s&url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn))) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
382 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
383 return serialize(pagediv) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
384 |
455 | 385 # xml mode |
386 elif mode == "xml": | |
387 if pagediv: | |
388 return serialize(pagediv) | |
389 | |
390 # pureXml mode | |
391 elif mode == "pureXml": | |
392 if pagediv: | |
393 return serialize(pagediv) | |
394 | |
395 # gis mode | |
396 elif mode == "gis": | |
397 name = docinfo['name'] | |
398 if pagediv: | |
399 # check all a-tags | |
400 links = pagediv.findall(".//a") | |
401 for l in links: | |
402 href = l.get('href') | |
403 if href: | |
404 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'): | |
405 l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name)) | |
406 l.set('target', '_blank') | |
407 | |
408 return serialize(pagediv) | |
409 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
410 return "no text here" |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
411 |
455 | 412 # WTF: is this needed? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
413 def getOrigPages(self, docinfo=None, pageinfo=None): |
455 | 414 logging.debug("CALLED: getOrigPages!") |
415 if not pageinfo.has_key('pageNumberOrig'): | |
416 logging.warning("getOrigPages: not in pageinfo!") | |
417 return None | |
418 | |
419 return pageinfo['pageNumberOrig'] | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
420 |
455 | 421 # WTF: is this needed? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
422 def getOrigPagesNorm(self, docinfo=None, pageinfo=None): |
455 | 423 logging.debug("CALLED: getOrigPagesNorm!") |
424 if not pageinfo.has_key('pageNumberOrigNorm'): | |
425 logging.warning("getOrigPagesNorm: not in pageinfo!") | |
426 return None | |
427 | |
428 return pageinfo['pageNumberOrigNorm'] | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
429 |
455 | 430 # TODO: should be getWordInfo |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
431 def getTranslate(self, word=None, language=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
432 """translate into another languages""" |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
433 data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
434 return data |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
435 |
455 | 436 # WTF: what does this do? |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
437 def getLemma(self, lemma=None, language=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
438 """simular words lemma """ |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
439 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
440 return data |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
441 |
455 | 442 # WTF: what does this do? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
443 def getLemmaQuery(self, query=None, language=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
444 """simular words lemma """ |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
445 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
446 return data |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
447 |
455 | 448 # WTF: what does this do? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
449 def getLex(self, query=None, language=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
450 #simular words lemma |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
451 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
452 return data |
455 | 453 |
454 # WTF: what does this do? | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
455 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
456 #number of |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
457 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
458 pagesize = pageinfo['queryPageSize'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
459 pn = pageinfo['searchPN'] |
181 | 460 query =pageinfo['query'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
461 queryType =pageinfo['queryType'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
462 tocSearch = 0 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
463 tocDiv = None |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
464 |
179 | 465 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
466 pagedom = Parse(pagexml) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
467 numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
468 tocSearch = int(getTextFromNode(numdivs[0])) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
469 tc=int((tocSearch/10)+1) |
402 | 470 return tc |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
471 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
472 def getToc(self, mode="text", docinfo=None): |
455 | 473 """loads table of contents and stores XML in docinfo""" |
474 logging.debug("getToc mode=%s"%mode) | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
475 if mode == "none": |
455 | 476 return docinfo |
477 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
478 if 'tocSize_%s'%mode in docinfo: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
479 # cached toc |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
480 return docinfo |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
481 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
482 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
483 # we need to set a result set size |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
484 pagesize = 1000 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
485 pn = 1 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
486 if mode == "text": |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
487 queryType = "toc" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
488 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
489 queryType = mode |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
490 # number of entries in toc |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
491 tocSize = 0 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
492 tocDiv = None |
455 | 493 # fetch full toc |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
494 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
455 | 495 dom = ET.fromstring(pagexml) |
496 # page content is in <div class="queryResultPage"> | |
497 pagediv = None | |
498 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage'] | |
499 alldivs = dom.findall("div") | |
500 for div in alldivs: | |
501 dc = div.get('class') | |
502 # page content div | |
503 if dc == 'queryResultPage': | |
504 pagediv = div | |
505 | |
506 elif dc == 'queryResultHits': | |
507 docinfo['tocSize_%s'%mode] = intOr0(div.text) | |
508 | |
509 if pagediv: | |
510 # # split xml in chunks | |
511 # tocs = [] | |
512 # tocdivs = pagediv.findall('div') | |
513 # for p in zip(tocdivs[::2], tocdivs[1::2]): | |
514 # toc = serialize(p[0]) | |
515 # toc += serialize(p[1]) | |
516 # tocs.append(toc) | |
517 # logging.debug("pair: %s"%(toc)) | |
518 # store XML in docinfo | |
519 docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8') | |
520 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
521 return docinfo |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
522 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
523 def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
524 """returns single page from the table of contents""" |
455 | 525 logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
526 if mode == "text": |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
527 queryType = "toc" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
528 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
529 queryType = mode |
455 | 530 |
531 # check for cached TOC | |
532 if not docinfo.has_key('tocXML_%s'%mode): | |
533 self.getToc(mode=mode, docinfo=docinfo) | |
534 | |
535 tocxml = docinfo.get('tocXML_%s'%mode, None) | |
536 if not tocxml: | |
537 logging.error("getTocPage: unable to find tocXML") | |
538 return "No ToC" | |
539 | |
540 pagesize = int(pageinfo['tocPageSize']) | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
541 url = docinfo['url'] |
455 | 542 urlmode = docinfo['mode'] |
543 selfurl = docinfo['viewerUrl'] | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
544 viewMode= pageinfo['viewMode'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
545 tocMode = pageinfo['tocMode'] |
455 | 546 tocPN = int(pageinfo['tocPN']) |
456 | 547 pn = tocPN |
455 | 548 |
549 fulltoc = ET.fromstring(tocxml) | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
550 |
455 | 551 if fulltoc: |
552 # paginate | |
456 | 553 start = (pn - 1) * pagesize * 2 |
554 len = pagesize * 2 | |
555 del fulltoc[:start] | |
556 del fulltoc[len:] | |
455 | 557 tocdivs = fulltoc |
558 | |
559 # check all a-tags | |
560 links = tocdivs.findall(".//a") | |
561 for l in links: | |
562 href = l.get('href') | |
563 if href: | |
564 # take pn from href | |
565 m = re.match(r'page-fragment\.xql.*pn=(\d+)', href) | |
566 if m is not None: | |
567 # and create new url | |
568 l.set('href', '%s?mode=%s&url=%s&viewMode=%s&pn=%s&tocMode=%s&tocPN=%s'%(selfurl, urlmode, url, viewMode, m.group(1), tocMode, tocPN)) | |
569 else: | |
570 logging.warning("getTocPage: Problem with link=%s"%href) | |
571 | |
572 return serialize(tocdivs) | |
573 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
574 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
575 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
576 """change settings""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
577 self.title=title |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
578 self.timeout = timeout |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
579 self.serverUrl = serverUrl |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
580 if RESPONSE is not None: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
581 RESPONSE.redirect('manage_main') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
582 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
583 # management methods |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
584 def manage_addMpdlXmlTextServerForm(self): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
585 """Form for adding""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
586 pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
587 return pt() |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
588 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
589 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
590 #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
591 """add zogiimage""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
592 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
593 self.Destination()._setObject(id, newObj) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
594 if RESPONSE is not None: |
455 | 595 RESPONSE.redirect('manage_main') |
596 | |
597 |