Mercurial > hg > documentViewer
annotate MpdlXmlTextServer.py @ 453:beb7ccb92564 elementtree
first version using elementtree instead of 4suite xml
author | casties |
---|---|
date | Thu, 14 Jul 2011 19:43:56 +0200 |
parents | 1cea48640992 |
children | 0a53fea83df7 |
rev | line source |
---|---|
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
1 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
2 from OFS.SimpleItem import SimpleItem |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
4 from Ft.Xml import EMPTY_NAMESPACE, Parse |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
5 from Ft.Xml.Domlette import NonvalidatingReader |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
6 import Ft.Xml.Domlette |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
7 import cStringIO |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
8 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
9 import xml.etree.ElementTree as ET |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
10 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
11 import md5 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
12 import sys |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
13 import logging |
134 | 14 import urllib |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
15 import documentViewer |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
16 #from documentViewer import getTextFromNode, serializeNode |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
17 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
18 def getText(node): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
19 """get the cdata content of a node""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
20 if node is None: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
21 return "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
22 # ET: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
23 text = node.text or "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
24 for e in node: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
25 text += gettext(e) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
26 if e.tail: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
27 text += e.tail |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
28 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
29 return text |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
30 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
31 def serialize(node): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
32 """returns a string containing an XML snippet of node""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
33 s = ET.tostring(node, 'UTF-8') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
34 # snip off XML declaration |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
35 if s.startswith('<?xml'): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
36 i = s.find('?>') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
37 return s[i+3:] |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
38 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
39 return s |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
40 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
41 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
42 def getTextFromNode(node): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
43 """get the cdata content of a node""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
44 if node is None: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
45 return "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
46 # ET: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
47 #text = node.text or "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
48 #for e in node: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
49 # text += gettext(e) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
50 # if e.tail: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
51 # text += e.tail |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
52 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
53 # 4Suite: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
54 nodelist=node.childNodes |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
55 text = "" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
56 for n in nodelist: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
57 if n.nodeType == node.TEXT_NODE: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
58 text = text + n.data |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
59 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
60 return text |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
61 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
62 def serializeNode(node, encoding="utf-8"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
63 """returns a string containing node as XML""" |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
64 #s = ET.tostring(node) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
65 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
66 # 4Suite: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
67 stream = cStringIO.StringIO() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
68 Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
69 s = stream.getvalue() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
70 stream.close() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
71 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
72 return s |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
73 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
74 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
75 class MpdlXmlTextServer(SimpleItem): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
76 """TextServer implementation for MPDL-XML eXist server""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
77 meta_type="MPDL-XML TextServer" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
78 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
79 manage_options=( |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
80 {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'}, |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
81 )+SimpleItem.manage_options |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
82 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
83 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals()) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
84 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
85 def __init__(self,id,title="",serverUrl="http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
86 #def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/", serverName=None, timeout=40): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
87 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
88 """constructor""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
89 self.id=id |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
90 self.title=title |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
91 self.timeout = timeout |
132 | 92 if serverName is None: |
93 self.serverUrl = serverUrl | |
94 else: | |
95 self.serverUrl = "http://%s/mpdl/interface/"%serverName | |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
96 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
97 def getHttpData(self, url, data=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
98 """returns result from url+data HTTP request""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
99 return documentViewer.getHttpData(url,data,timeout=self.timeout) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
100 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
101 def getServerData(self, method, data=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
102 """returns result from text server for method+data""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
103 url = self.serverUrl+method |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
104 return documentViewer.getHttpData(url,data,timeout=self.timeout) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
105 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
106 def getSearch(self, pageinfo=None, docinfo=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
107 """get search list""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
108 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
109 url = docinfo['url'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
110 pagesize = pageinfo['queryPageSize'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
111 pn = pageinfo.get('searchPN',1) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
112 sn = pageinfo['sn'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
113 highlightQuery = pageinfo['highlightQuery'] |
181 | 114 query =pageinfo['query'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
115 queryType =pageinfo['queryType'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
116 viewMode= pageinfo['viewMode'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
117 tocMode = pageinfo['tocMode'] |
169 | 118 characterNormalization = pageinfo['characterNormalization'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
119 #optionToggle = pageinfo['optionToggle'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
120 tocPN = pageinfo['tocPN'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
121 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
122 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery))) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
123 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
124 pagedom = Parse(pagexml) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
125 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
126 """ |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
127 pagedivs = pagedom.xpath("//div[@class='queryResultHits']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
128 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
129 if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
130 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
131 s = getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
132 s1 = int(s)/10+1 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
133 try: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
134 docinfo['queryResultHits'] = int(s1) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
135 logging.debug("SEARCH ENTRIES: %s"%(s1)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
136 except: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
137 docinfo['queryResultHits'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
138 """ |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
139 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
140 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
141 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
142 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
143 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
144 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
145 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
146 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
147 href = hrefNode.nodeValue |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
148 if href.startswith('page-fragment.xql'): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
149 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
150 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
151 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
152 #logging.debug("PUREXML :%s"%(serializeNode(pagenode))) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
153 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
154 if (queryType=="fulltextMorph"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
155 pagedivs = pagedom.xpath("//div[@class='queryResult']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
156 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
157 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
158 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
159 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
160 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
161 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
162 href = hrefNode.nodeValue |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
163 if href.startswith('page-fragment.xql'): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
164 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
165 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
166 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
167 if href.startswith('../lt/lemma.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
168 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
169 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
170 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
171 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
172 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
173 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
174 if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
175 pagedivs= pagedom.xpath("//div[@class='queryResultPage']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
176 if len(pagedivs)>0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
177 pagenode=pagedivs[0] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
178 links=pagenode.xpath("//a") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
179 for l in links: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
180 hrefNode = l.getAttributeNodeNS(None, u"href") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
181 if hrefNode: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
182 href = hrefNode.nodeValue |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
183 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
184 if href.startswith('../lt/lex.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
185 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
186 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
187 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
188 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
189 if href.startswith('../lt/lemma.xql'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
190 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
191 l.setAttributeNS(None, 'target', '_blank') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
192 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
193 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
194 return serializeNode(pagenode) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
195 return "no text here" |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
196 |
253 | 197 def getGisPlaces(self, docinfo=None, pageinfo=None): |
207 | 198 """ Show all Gis Places of whole Page""" |
264 | 199 xpath='//place' |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
200 docpath = docinfo.get('textURLPath',None) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
201 if not docpath: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
202 return None |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
203 |
253 | 204 url = docinfo['url'] |
205 selfurl = self.absolute_url() | |
257 | 206 pn = pageinfo['current'] |
291 | 207 hrefList=[] |
306 | 208 myList= "" |
264 | 209 text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn)) |
210 dom = Parse(text) | |
265 | 211 result = dom.xpath("//result/resultPage/place") |
236 | 212 for l in result: |
250 | 213 hrefNode= l.getAttributeNodeNS(None, u"id") |
272 | 214 href= hrefNode.nodeValue |
292 | 215 hrefList.append(href) |
309 | 216 myList = ",".join(hrefList) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
217 #logging.debug("getGisPlaces :%s"%(myList)) |
307 | 218 return myList |
219 | |
220 def getAllGisPlaces (self, docinfo=None, pageinfo=None): | |
221 """Show all Gis Places of whole Book """ | |
222 xpath ='//echo:place' | |
223 docpath =docinfo['textURLPath'] | |
224 url = docinfo['url'] | |
225 selfurl =self.absolute_url() | |
226 pn =pageinfo['current'] | |
227 hrefList=[] | |
228 myList="" | |
229 text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath)) | |
230 dom =Parse(text) | |
231 result = dom.xpath("//result/resultPage/place") | |
395 | 232 |
307 | 233 for l in result: |
234 hrefNode = l.getAttributeNodeNS(None, u"id") | |
235 href= hrefNode.nodeValue | |
236 hrefList.append(href) | |
300 | 237 myList = ",".join(hrefList) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
238 #logging.debug("getALLGisPlaces :%s"%(myList)) |
309 | 239 return myList |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
240 |
388 | 241 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
242 def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
243 """returns single page from fulltext""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
244 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
245 path = docinfo['textURLPath'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
246 url = docinfo.get('url',None) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
247 name = docinfo.get('name',None) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
248 pn =pageinfo['current'] |
376 | 249 sn = pageinfo['sn'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
250 #optionToggle =pageinfo ['optionToggle'] |
367 | 251 highlightQuery = pageinfo['highlightQuery'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
252 #mode = pageinfo ['viewMode'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
253 tocMode = pageinfo['tocMode'] |
164 | 254 characterNormalization=pageinfo['characterNormalization'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
255 tocPN = pageinfo['tocPN'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
256 selfurl = self.absolute_url() |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
257 if mode == "text_dict": |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
258 textmode = "textPollux" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
259 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
260 textmode = mode |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
261 |
373 | 262 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) |
370 | 263 if highlightQuery is not None: |
376 | 264 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
265 |
185 | 266 pagexml = self.getServerData("page-fragment.xql",textParam) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
267 dom = ET.fromstring(pagexml) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
268 #dom = NonvalidatingReader.parseStream(pagexml) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
269 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
270 #original Pages |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
271 #pagedivs = dom.xpath("//div[@class='pageNumberOrig']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
272 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
273 """if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
274 if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
275 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
276 logging.debug("ORIGINAL PAGE: %s"%(docinfo['pageNumberOrig'])) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
277 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
278 #original Pages Norm |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
279 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
280 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
281 if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
282 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
283 logging.debug("ORIGINAL PAGE NORM: %s"%(docinfo['pageNumberOrigNorm'])) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
284 """ |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
285 #figureEntries |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
286 # pagedivs = dom.xpath("//div[@class='countFigureEntries']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
287 # if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
288 # if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
289 # docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
290 # s = getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
291 # if s=='0': |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
292 # try: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
293 # docinfo['countFigureEntries'] = int(s) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
294 # except: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
295 # docinfo['countFigureEntries'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
296 # else: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
297 # s1 = int(s)/30+1 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
298 # try: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
299 # docinfo['countFigureEntries'] = int(s1) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
300 # except: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
301 # docinfo['countFigureEntries'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
302 # |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
303 # #allPlaces |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
304 # pagedivs = dom.xpath("//div[@class='countPlaces']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
305 # if pagedivs == dom.xpath("//div[@class='countPlaces']"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
306 # if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
307 # docinfo['countPlaces']= getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
308 # s = getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
309 # try: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
310 # docinfo['countPlaces'] = int(s) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
311 # except: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
312 # docinfo['countPlaces'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
313 # |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
314 # #tocEntries |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
315 # pagedivs = dom.xpath("//div[@class='countTocEntries']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
316 # if pagedivs == dom.xpath("//div[@class='countTocEntries']"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
317 # if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
318 # docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0])) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
319 # s = getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
320 # if s=='0': |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
321 # try: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
322 # docinfo['countTocEntries'] = int(s) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
323 # except: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
324 # docinfo['countTocEntries'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
325 # else: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
326 # s1 = int(s)/30+1 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
327 # try: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
328 # docinfo['countTocEntries'] = int(s1) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
329 # except: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
330 # docinfo['countTocEntries'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
331 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
332 #numTextPages |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
333 #pagedivs = dom.xpath("//div[@class='countPages']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
334 alldivs = dom.findall(".//div") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
335 pagediv = None |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
336 for div in alldivs: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
337 dc = div.get('class') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
338 if dc == 'pageContent': |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
339 pagediv = div |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
340 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
341 if dc == 'countPages': |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
342 try: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
343 np = int(div.text) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
344 docinfo['numPages'] = np |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
345 pageinfo['end'] = min(pageinfo['end'], np) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
346 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
347 if np % pageinfo['groupsize'] > 0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
348 pageinfo['numgroups'] += 1 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
349 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
350 except: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
351 docinfo['numPages'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
352 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
353 break |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
354 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
355 # ROC: why? |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
356 # else: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
357 # #no full text -- init to 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
358 # docinfo['pageNumberOrig'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
359 # docinfo['countFigureEntries'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
360 # docinfo['countPlaces'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
361 # docinfo['countTocEntries'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
362 # docinfo['numPages'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
363 # docinfo['pageNumberOrigNorm'] = 0 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
364 # #return docinfo |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
365 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
366 # plain text mode |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
367 if mode == "text": |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
368 #pagedivs = dom.xpath("/div") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
369 if pagediv: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
370 links = pagediv.findall(".//a") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
371 for l in links: |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
372 href = l.get('href') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
373 if href and href.startswith('#note-'): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
374 href = href.replace('#note-',"?url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
375 l.set('href', href) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
376 logging.debug("page=%s"%ET.tostring(pagediv, 'UTF-8')) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
377 return serialize(pagediv) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
378 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
379 if mode == "xml": |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
380 if pagediv: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
381 return serialize(pagediv) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
382 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
383 if mode == "pureXml": |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
384 if pagediv: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
385 return serialize(pagediv) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
386 |
140 | 387 if mode == "gis": |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
388 if pagediv: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
389 # check all a-tags |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
390 links = pagediv.findall(".//a") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
391 for l in links: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
392 href = l.get('href') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
393 if href: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
394 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
395 l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
396 l.set('target', '_blank') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
397 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
398 return serialize(pagenode) |
140 | 399 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
400 # text-with-links mode |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
401 if mode == "text_dict": |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
402 if pagediv: |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
403 # check all a-tags |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
404 links = pagediv.findall(".//a") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
405 for l in links: |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
406 href = l.get('href') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
407 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
408 if href: |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
409 # is link with href |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
410 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
411 # is pollux link |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
412 selfurl = self.absolute_url() |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
413 # change href |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
414 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
415 # add target |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
416 l.set('target', '_blank') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
417 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
418 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
419 selfurl = self.absolute_url() |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
420 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
421 l.set('target', '_blank') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
422 l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
423 l.set('ondblclick', 'popupWin.focus();') |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
424 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
425 if href.startswith('#note-'): |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
426 l.set('href', href.replace('#note-',"?url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn))) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
427 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
428 return serialize(pagediv) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
429 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
430 return "no text here" |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
431 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
432 def getOrigPages(self, docinfo=None, pageinfo=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
433 docpath = docinfo['textURLPath'] |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
434 pn =pageinfo['current'] |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
435 selfurl = self.absolute_url() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
436 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
437 dom = Parse(pagexml) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
438 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
439 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
440 if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
441 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
442 return docinfo['pageNumberOrig'] |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
443 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
444 def getOrigPagesNorm(self, docinfo=None, pageinfo=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
445 docpath = docinfo['textURLPath'] |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
446 pn =pageinfo['current'] |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
447 selfurl = self.absolute_url() |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
448 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
449 dom = Parse(pagexml) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
450 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
451 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
452 if len(pagedivs)>0: |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
453 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
454 return docinfo['pageNumberOrigNorm'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
455 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
456 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
457 def getTranslate(self, word=None, language=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
458 """translate into another languages""" |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
459 data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
460 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
461 return data |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
462 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
463 def getLemma(self, lemma=None, language=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
464 """simular words lemma """ |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
465 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html") |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
466 return data |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
467 |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
468 def getLemmaQuery(self, query=None, language=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
469 """simular words lemma """ |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
470 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html") |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
471 return data |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
472 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
473 def getLex(self, query=None, language=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
474 #simular words lemma |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
475 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
476 return data |
175 | 477 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
478 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
479 #number of |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
480 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
481 pagesize = pageinfo['queryPageSize'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
482 pn = pageinfo['searchPN'] |
181 | 483 query =pageinfo['query'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
484 queryType =pageinfo['queryType'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
485 tocSearch = 0 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
486 tocDiv = None |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
487 |
179 | 488 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
489 pagedom = Parse(pagexml) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
490 numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
491 tocSearch = int(getTextFromNode(numdivs[0])) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
492 tc=int((tocSearch/10)+1) |
402 | 493 return tc |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
494 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
495 def getToc(self, mode="text", docinfo=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
496 """loads table of contents and stores in docinfo""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
497 if mode == "none": |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
498 return docinfo |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
499 if 'tocSize_%s'%mode in docinfo: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
500 # cached toc |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
501 return docinfo |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
502 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
503 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
504 # we need to set a result set size |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
505 pagesize = 1000 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
506 pn = 1 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
507 if mode == "text": |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
508 queryType = "toc" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
509 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
510 queryType = mode |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
511 # number of entries in toc |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
512 tocSize = 0 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
513 tocDiv = None |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
514 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
515 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
516 |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
517 # post-processing downloaded xml |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
518 pagedom = Parse(pagexml) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
519 # get number of entries |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
520 numdivs = pagedom.xpath("//div[@class='queryResultHits']") |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
521 if len(numdivs) > 0: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
522 tocSize = int(getTextFromNode(numdivs[0])) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
523 docinfo['tocSize_%s'%mode] = tocSize |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
524 return docinfo |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
525 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
526 def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
527 """returns single page from the table of contents""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
528 # TODO: this should use the cached TOC |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
529 if mode == "text": |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
530 queryType = "toc" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
531 else: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
532 queryType = mode |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
533 docpath = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
534 path = docinfo['textURLPath'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
535 pagesize = pageinfo['tocPageSize'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
536 pn = pageinfo['tocPN'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
537 url = docinfo['url'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
538 selfurl = self.absolute_url() |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
539 viewMode= pageinfo['viewMode'] |
171 | 540 characterNormalization = pageinfo ['characterNormalization'] |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
541 #optionToggle =pageinfo ['optionToggle'] |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
542 tocMode = pageinfo['tocMode'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
543 tocPN = pageinfo['tocPN'] |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
544 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
545 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm"%(docpath,queryType, pagesize, pn)) |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
546 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
547 text = page.replace('mode=image','mode=texttool') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
548 return text |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
549 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
550 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
551 #def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
552 """change settings""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
553 self.title=title |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
554 self.timeout = timeout |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
555 self.serverUrl = serverUrl |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
556 if RESPONSE is not None: |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
557 RESPONSE.redirect('manage_main') |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
558 |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
559 # management methods |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
560 def manage_addMpdlXmlTextServerForm(self): |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
561 """Form for adding""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
562 pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
563 return pt() |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
564 |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
565 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): |
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
566 #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None): |
129
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
567 """add zogiimage""" |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
568 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
569 self.Destination()._setObject(id, newObj) |
9404b6c37920
more modular version with separate object MpdlXmlTextServer
casties
parents:
diff
changeset
|
570 if RESPONSE is not None: |
453
beb7ccb92564
first version using elementtree instead of 4suite xml
casties
parents:
407
diff
changeset
|
571 RESPONSE.redirect('manage_main') |