Mercurial > hg > documentViewer
annotate MpdlXmlTextServer.py @ 469:15394486ab75 elementtree
working with new templates
author | casties |
---|---|
date | Wed, 03 Aug 2011 21:04:18 +0200 |
parents | 224aad394350 |
children | 415a7026eeda |
rev | line source |
---|---|
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
1 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
2 from OFS.SimpleItem import SimpleItem |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
455 | 4 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
5 from Ft.Xml import EMPTY_NAMESPACE, Parse |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
6 from Ft.Xml.Domlette import NonvalidatingReader |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
7 import Ft.Xml.Domlette |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
8 import cStringIO |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
9 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
10 import xml.etree.ElementTree as ET |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
11 |
455 | 12 import re |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
13 import logging |
134 | 14 import urllib |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
15 |
458 | 16 from SrvTxtUtils import getInt, getText, getHttpData |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
17 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
18 def serialize(node): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
19 """returns a string containing an XML snippet of node""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
20 s = ET.tostring(node, 'UTF-8') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
21 # snip off XML declaration |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
22 if s.startswith('<?xml'): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
23 i = s.find('?>') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
24 return s[i+3:] |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
25 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
26 return s |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
27 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
28 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
29 def getTextFromNode(node): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
30 """get the cdata content of a node""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
31 if node is None: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
32 return "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
33 # ET: |
455 | 34 # text = node.text or "" |
35 # for e in node: | |
36 # text += gettext(e) | |
37 # if e.tail: | |
38 # text += e.tail | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
39 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
40 # 4Suite: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
41 nodelist=node.childNodes |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
42 text = "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
43 for n in nodelist: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
44 if n.nodeType == node.TEXT_NODE: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
45 text = text + n.data |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
46 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
47 return text |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
48 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
49 def serializeNode(node, encoding="utf-8"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
50 """returns a string containing node as XML""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
51 #s = ET.tostring(node) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
52 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
53 # 4Suite: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
54 stream = cStringIO.StringIO() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
55 Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
56 s = stream.getvalue() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
57 stream.close() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
58 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
59 return s |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
60 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
61 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
62 class MpdlXmlTextServer(SimpleItem): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
63 """TextServer implementation for MPDL-XML eXist server""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
64 meta_type="MPDL-XML TextServer" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
65 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
66 manage_options=( |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
67 {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'}, |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
68 )+SimpleItem.manage_options |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
69 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
70 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals()) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
71 |
455 | 72 def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
73 """constructor""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
74 self.id=id |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
75 self.title=title |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
76 self.timeout = timeout |
132 | 77 if serverName is None: |
78 self.serverUrl = serverUrl | |
79 else: | |
80 self.serverUrl = "http://%s/mpdl/interface/"%serverName | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
81 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
82 def getHttpData(self, url, data=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
83 """returns result from url+data HTTP request""" |
458 | 84 return getHttpData(url,data,timeout=self.timeout) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
85 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
86 def getServerData(self, method, data=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
87 """returns result from text server for method+data""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
88 url = self.serverUrl+method |
458 | 89 return getHttpData(url,data,timeout=self.timeout) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
90 |
455 | 91 # WTF: what does this really do? can it be integrated in getPage? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
92 def getSearch(self, pageinfo=None, docinfo=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
93 """get search list""" |
455 | 94 logging.debug("getSearch()") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
95 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
96 url = docinfo['url'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
97 pagesize = pageinfo['queryPageSize'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
98 pn = pageinfo.get('searchPN',1) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
99 sn = pageinfo['sn'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
100 highlightQuery = pageinfo['highlightQuery'] |
181 | 101 query =pageinfo['query'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
102 queryType =pageinfo['queryType'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
103 viewMode= pageinfo['viewMode'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
104 tocMode = pageinfo['tocMode'] |
169 | 105 characterNormalization = pageinfo['characterNormalization'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
106 #optionToggle = pageinfo['optionToggle'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
107 tocPN = pageinfo['tocPN'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
108 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
109 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery))) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
110 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
111 pagedom = Parse(pagexml) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
112 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
113 """ |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
114 pagedivs = pagedom.xpath("//div[@class='queryResultHits']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
115 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
116 if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
117 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
118 s = getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
119 s1 = int(s)/10+1 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
120 try: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
121 docinfo['queryResultHits'] = int(s1) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
122 logging.debug("SEARCH ENTRIES: %s"%(s1)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
123 except: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
124 docinfo['queryResultHits'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
125 """ |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
126 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
127 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
128 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
129 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
130 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
131 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
132 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
133 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
134 href = hrefNode.nodeValue |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
135 if href.startswith('page-fragment.xql'): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
136 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
137 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
138 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
139 #logging.debug("PUREXML :%s"%(serializeNode(pagenode))) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
140 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
141 if (queryType=="fulltextMorph"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
142 pagedivs = pagedom.xpath("//div[@class='queryResult']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
143 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
144 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
145 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
146 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
147 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
148 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
149 href = hrefNode.nodeValue |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
150 if href.startswith('page-fragment.xql'): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
151 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
152 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
153 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
154 if href.startswith('../lt/lemma.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
155 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
156 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
157 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
158 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
159 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
160 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
161 if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
162 pagedivs= pagedom.xpath("//div[@class='queryResultPage']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
163 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
164 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
165 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
166 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
167 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
168 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
169 href = hrefNode.nodeValue |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
170 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
171 if href.startswith('../lt/lex.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
172 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
173 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
174 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
175 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
176 if href.startswith('../lt/lemma.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
177 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
178 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
179 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
180 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
181 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
182 return "no text here" |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
183 |
253 | 184 def getGisPlaces(self, docinfo=None, pageinfo=None): |
207 | 185 """ Show all Gis Places of whole Page""" |
264 | 186 xpath='//place' |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
187 docpath = docinfo.get('textURLPath',None) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
188 if not docpath: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
189 return None |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
190 |
253 | 191 url = docinfo['url'] |
192 selfurl = self.absolute_url() | |
257 | 193 pn = pageinfo['current'] |
291 | 194 hrefList=[] |
306 | 195 myList= "" |
264 | 196 text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn)) |
455 | 197 dom = ET.fromstring(text) |
198 result = dom.findall(".//result/resultPage/place") | |
236 | 199 for l in result: |
455 | 200 href = l.get("id") |
292 | 201 hrefList.append(href) |
455 | 202 # WTF: what does this do? |
309 | 203 myList = ",".join(hrefList) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
204 #logging.debug("getGisPlaces :%s"%(myList)) |
307 | 205 return myList |
206 | |
207 def getAllGisPlaces (self, docinfo=None, pageinfo=None): | |
208 """Show all Gis Places of whole Book """ | |
209 xpath ='//echo:place' | |
210 docpath =docinfo['textURLPath'] | |
211 url = docinfo['url'] | |
212 selfurl =self.absolute_url() | |
213 pn =pageinfo['current'] | |
214 hrefList=[] | |
215 myList="" | |
216 text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath)) | |
455 | 217 dom = ET.fromstring(text) |
218 result = dom.findall(".//result/resultPage/place") | |
395 | 219 |
307 | 220 for l in result: |
455 | 221 href = l.get("id") |
307 | 222 hrefList.append(href) |
455 | 223 # WTF: what does this do? |
300 | 224 myList = ",".join(hrefList) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
225 #logging.debug("getALLGisPlaces :%s"%(myList)) |
309 | 226 return myList |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
227 |
455 | 228 def processPageInfo(self, dom, docinfo, pageinfo): |
229 """processes page info divs from dom and stores in docinfo and pageinfo""" | |
469 | 230 # assume first second level div is pageMeta |
231 alldivs = dom.find("div") | |
455 | 232 for div in alldivs: |
233 dc = div.get('class') | |
234 | |
235 # pageNumberOrig | |
469 | 236 if dc == 'pageNumberOrig': |
455 | 237 pageinfo['pageNumberOrig'] = div.text |
238 | |
239 # pageNumberOrigNorm | |
240 elif dc == 'pageNumberOrigNorm': | |
241 pageinfo['pageNumberOrigNorm'] = div.text | |
242 | |
243 # pageNumberOrigNorm | |
244 elif dc == 'countFigureEntries': | |
469 | 245 docinfo['numFigureEntries'] = getInt(div.text) |
455 | 246 |
247 # pageNumberOrigNorm | |
248 elif dc == 'countTocEntries': | |
249 # WTF: s1 = int(s)/30+1 | |
469 | 250 docinfo['numTocEntries'] = getInt(div.text) |
251 | |
252 # pageHeaderTitle | |
253 elif dc == 'pageHeaderTitle': | |
254 docinfo['pageHeaderTitle'] = div.text | |
455 | 255 |
256 # numTextPages | |
257 elif dc == 'countPages': | |
458 | 258 np = getInt(div.text) |
455 | 259 if np > 0: |
260 docinfo['numTextPages'] = np | |
261 if docinfo.get('numPages', 0) == 0: | |
465 | 262 # seems to be text-only - update page count |
263 docinfo['numPages'] = np | |
455 | 264 pageinfo['end'] = min(pageinfo['end'], np) |
265 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) | |
266 if np % pageinfo['groupsize'] > 0: | |
267 pageinfo['numgroups'] += 1 | |
268 | |
269 return | |
270 | |
388 | 271 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
272 def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
273 """returns single page from fulltext""" |
455 | 274 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn)) |
275 # check for cached text -- but this shouldn't be called twice | |
276 if pageinfo.has_key('textPage'): | |
277 logging.debug("getTextPage: using cached text") | |
278 return pageinfo['textPage'] | |
279 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
280 docpath = docinfo['textURLPath'] |
455 | 281 # just checking |
282 if pageinfo['current'] != pn: | |
283 logging.warning("getTextPage: current!=pn!") | |
284 | |
285 # stuff for constructing full urls | |
286 url = docinfo['url'] | |
287 urlmode = docinfo['mode'] | |
288 sn = pageinfo.get('sn', None) | |
289 highlightQuery = pageinfo.get('highlightQuery', None) | |
290 tocMode = pageinfo.get('tocMode', None) | |
291 tocPN = pageinfo.get('tocPN',None) | |
292 characterNormalization = pageinfo.get('characterNormalization', None) | |
293 selfurl = docinfo['viewerUrl'] | |
294 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
295 if mode == "text_dict": |
455 | 296 # text_dict is called textPollux in the backend |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
297 textmode = "textPollux" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
298 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
299 textmode = mode |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
300 |
373 | 301 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) |
455 | 302 if highlightQuery: |
376 | 303 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
304 |
455 | 305 # fetch the page |
185 | 306 pagexml = self.getServerData("page-fragment.xql",textParam) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
307 dom = ET.fromstring(pagexml) |
455 | 308 # extract additional info |
309 self.processPageInfo(dom, docinfo, pageinfo) | |
310 # page content is in <div class="pageContent"> | |
311 pagediv = None | |
312 # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent'] | |
469 | 313 # so we look at the second level divs |
314 alldivs = dom.findall("div") | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
315 for div in alldivs: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
316 dc = div.get('class') |
455 | 317 # page content div |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
318 if dc == 'pageContent': |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
319 pagediv = div |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
320 break |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
321 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
322 # plain text mode |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
323 if mode == "text": |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
324 if pagediv: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
325 links = pagediv.findall(".//a") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
326 for l in links: |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
327 href = l.get('href') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
328 if href and href.startswith('#note-'): |
455 | 329 href = href.replace('#note-',"?mode=%s&url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn)) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
330 l.set('href', href) |
455 | 331 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
332 return serialize(pagediv) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
333 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
334 # text-with-links mode |
455 | 335 elif mode == "text_dict": |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
336 if pagediv: |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
337 # check all a-tags |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
338 links = pagediv.findall(".//a") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
339 for l in links: |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
340 href = l.get('href') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
341 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
342 if href: |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
343 # is link with href |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
344 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
345 # is pollux link |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
346 selfurl = self.absolute_url() |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
347 # change href |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
348 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
349 # add target |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
350 l.set('target', '_blank') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
351 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
352 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
353 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
354 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
355 l.set('target', '_blank') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
356 l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
357 l.set('ondblclick', 'popupWin.focus();') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
358 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
359 if href.startswith('#note-'): |
455 | 360 l.set('href', href.replace('#note-',"?mode=%s&url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn))) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
361 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
362 return serialize(pagediv) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
363 |
455 | 364 # xml mode |
365 elif mode == "xml": | |
366 if pagediv: | |
367 return serialize(pagediv) | |
368 | |
369 # pureXml mode | |
370 elif mode == "pureXml": | |
371 if pagediv: | |
372 return serialize(pagediv) | |
373 | |
374 # gis mode | |
375 elif mode == "gis": | |
376 name = docinfo['name'] | |
377 if pagediv: | |
378 # check all a-tags | |
379 links = pagediv.findall(".//a") | |
380 for l in links: | |
381 href = l.get('href') | |
382 if href: | |
383 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'): | |
384 l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name)) | |
385 l.set('target', '_blank') | |
386 | |
387 return serialize(pagediv) | |
388 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
389 return "no text here" |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
390 |
455 | 391 # WTF: is this needed? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
392 def getOrigPages(self, docinfo=None, pageinfo=None): |
455 | 393 logging.debug("CALLED: getOrigPages!") |
394 if not pageinfo.has_key('pageNumberOrig'): | |
395 logging.warning("getOrigPages: not in pageinfo!") | |
396 return None | |
397 | |
398 return pageinfo['pageNumberOrig'] | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
399 |
455 | 400 # WTF: is this needed? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
401 def getOrigPagesNorm(self, docinfo=None, pageinfo=None): |
455 | 402 logging.debug("CALLED: getOrigPagesNorm!") |
403 if not pageinfo.has_key('pageNumberOrigNorm'): | |
404 logging.warning("getOrigPagesNorm: not in pageinfo!") | |
405 return None | |
406 | |
407 return pageinfo['pageNumberOrigNorm'] | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
408 |
455 | 409 # TODO: should be getWordInfo |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
410 def getTranslate(self, word=None, language=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
411 """translate into another languages""" |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
412 data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
413 return data |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
414 |
455 | 415 # WTF: what does this do? |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
416 def getLemma(self, lemma=None, language=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
417 """simular words lemma """ |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
418 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
419 return data |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
420 |
455 | 421 # WTF: what does this do? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
422 def getLemmaQuery(self, query=None, language=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
423 """simular words lemma """ |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
424 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
425 return data |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
426 |
455 | 427 # WTF: what does this do? |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
428 def getLex(self, query=None, language=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
429 #simular words lemma |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
430 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
431 return data |
455 | 432 |
433 # WTF: what does this do? | |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
434 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
435 #number of |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
436 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
437 pagesize = pageinfo['queryPageSize'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
438 pn = pageinfo['searchPN'] |
181 | 439 query =pageinfo['query'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
440 queryType =pageinfo['queryType'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
441 tocSearch = 0 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
442 tocDiv = None |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
443 |
179 | 444 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
445 pagedom = Parse(pagexml) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
446 numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
447 tocSearch = int(getTextFromNode(numdivs[0])) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
448 tc=int((tocSearch/10)+1) |
402 | 449 return tc |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
450 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
451 def getToc(self, mode="text", docinfo=None): |
455 | 452 """loads table of contents and stores XML in docinfo""" |
453 logging.debug("getToc mode=%s"%mode) | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
454 if mode == "none": |
455 | 455 return docinfo |
456 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
457 if 'tocSize_%s'%mode in docinfo: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
458 # cached toc |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
459 return docinfo |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
460 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
461 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
462 # we need to set a result set size |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
463 pagesize = 1000 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
464 pn = 1 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
465 if mode == "text": |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
466 queryType = "toc" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
467 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
468 queryType = mode |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
469 # number of entries in toc |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
470 tocSize = 0 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
471 tocDiv = None |
455 | 472 # fetch full toc |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
473 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
455 | 474 dom = ET.fromstring(pagexml) |
475 # page content is in <div class="queryResultPage"> | |
476 pagediv = None | |
477 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage'] | |
478 alldivs = dom.findall("div") | |
479 for div in alldivs: | |
480 dc = div.get('class') | |
481 # page content div | |
482 if dc == 'queryResultPage': | |
483 pagediv = div | |
484 | |
485 elif dc == 'queryResultHits': | |
458 | 486 docinfo['tocSize_%s'%mode] = getInt(div.text) |
455 | 487 |
488 if pagediv: | |
489 # store XML in docinfo | |
490 docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8') | |
491 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
492 return docinfo |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
493 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
494 def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
495 """returns single page from the table of contents""" |
455 | 496 logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
497 if mode == "text": |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
498 queryType = "toc" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
499 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
500 queryType = mode |
455 | 501 |
502 # check for cached TOC | |
503 if not docinfo.has_key('tocXML_%s'%mode): | |
504 self.getToc(mode=mode, docinfo=docinfo) | |
505 | |
506 tocxml = docinfo.get('tocXML_%s'%mode, None) | |
507 if not tocxml: | |
508 logging.error("getTocPage: unable to find tocXML") | |
509 return "No ToC" | |
510 | |
511 pagesize = int(pageinfo['tocPageSize']) | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
512 url = docinfo['url'] |
455 | 513 urlmode = docinfo['mode'] |
514 selfurl = docinfo['viewerUrl'] | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
515 viewMode= pageinfo['viewMode'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
516 tocMode = pageinfo['tocMode'] |
455 | 517 tocPN = int(pageinfo['tocPN']) |
456 | 518 pn = tocPN |
455 | 519 |
520 fulltoc = ET.fromstring(tocxml) | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
521 |
455 | 522 if fulltoc: |
523 # paginate | |
456 | 524 start = (pn - 1) * pagesize * 2 |
525 len = pagesize * 2 | |
526 del fulltoc[:start] | |
527 del fulltoc[len:] | |
455 | 528 tocdivs = fulltoc |
529 | |
530 # check all a-tags | |
531 links = tocdivs.findall(".//a") | |
532 for l in links: | |
533 href = l.get('href') | |
534 if href: | |
535 # take pn from href | |
536 m = re.match(r'page-fragment\.xql.*pn=(\d+)', href) | |
537 if m is not None: | |
538 # and create new url | |
539 l.set('href', '%s?mode=%s&url=%s&viewMode=%s&pn=%s&tocMode=%s&tocPN=%s'%(selfurl, urlmode, url, viewMode, m.group(1), tocMode, tocPN)) | |
540 else: | |
541 logging.warning("getTocPage: Problem with link=%s"%href) | |
542 | |
543 return serialize(tocdivs) | |
544 | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
545 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
546 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
547 """change settings""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
548 self.title=title |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
549 self.timeout = timeout |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
550 self.serverUrl = serverUrl |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
551 if RESPONSE is not None: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
552 RESPONSE.redirect('manage_main') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
553 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
554 # management methods |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
555 def manage_addMpdlXmlTextServerForm(self): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
556 """Form for adding""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
557 pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
558 return pt() |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
559 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
560 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
561 #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
562 """add zogiimage""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
563 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
564 self.Destination()._setObject(id, newObj) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
565 if RESPONSE is not None: |
455 | 566 RESPONSE.redirect('manage_main') |
567 | |
568 |