comparison software/eXist/webapp/mpdl/pq.xql @ 7:5589d865af7a

Erstellung XQL/XSL Applikation
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 08 Feb 2011 15:16:46 +0100
parents
children
comparison
equal deleted inserted replaced
6:2396a569e446 7:5589d865af7a
1 xquery version "1.0";
2
3 import module namespace mpdl-time = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/util/time" at "util/time.xql";
4 import module namespace functx = "http://www.functx.com" at "util/functx.xql";
5 import module namespace mpdl-lucene = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/lucene/search" at "lucene/search.xql";
6 import module namespace mpdl-text = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/text" at "text/all.xql";
7
8 declare namespace xlink="http://www.w3.org/1999/xlink";
9 declare namespace request = "http://exist-db.org/xquery/request";
10 declare namespace transform = "http://exist-db.org/xquery/transform";
11 declare namespace util = "http://exist-db.org/xquery/util";
12
13 declare namespace dcterms="http://purl.org/dc/terms";
14 declare namespace xhtml="http://www.w3.org/1999/xhtml";
15 declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/";
16
17
18 let $mpdlDocUri := request:get-parameter("document", "")
19 let $queryType := request:get-parameter("query-type", "")
20 let $mode := request:get-parameter("mode", "image")
21 let $reqPN := number(request:get-parameter("pn", "-1"))
22 let $reqPF := request:get-parameter("pf", "")
23 let $reqSN := number(request:get-parameter("sn", "-1"))
24 let $query := request:get-parameter("query", "")
25 let $reqQueryResultPN := request:get-parameter("query-result-pn", "")
26 let $queryResultPN :=
27 if ($reqQueryResultPN = '' or $reqQueryResultPN = '0')
28 then 1
29 else number($reqQueryResultPN)
30 let $regCharNorm := request:get-parameter("characterNormalization", "")
31 let $tmpCharNorm := string-join($regCharNorm, ',')
32 let $charNorm :=
33 if($tmpCharNorm = "regPlusNorm")
34 then "reg,norm"
35 else $tmpCharNorm
36 let $reqExport := request:get-parameter("export", "")
37 let $options := string(request:get-parameter("options", ""))
38
39 let $presentationPath := "/db/mpdl/presentation"
40 (: e.g. mpdlCollectioName is derived from mpdlDocUri: /archimedes/la/yourDoc.xml :)
41 let $documentName := substring-before(substring-after(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/"), ".")
42 let $language := substring-before(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/")
43 let $docbase := substring-before(substring-after($mpdlDocUri, "/"), "/")
44 let $fullDocumentUri :=
45 if ($queryType = 'fulltext' or $queryType = 'ftIndex')
46 then concat('/db/mpdl/documents/standard', $mpdlDocUri)
47 else if ($queryType = 'fulltextMorph' or $queryType = 'fulltextMorphLemma' or $queryType = 'ftIndexMorph')
48 then concat('/db/mpdl/documents/morph', $mpdlDocUri)
49 else concat('/db/mpdl/documents/morph', $mpdlDocUri)
50 let $currentTimeBegin := util:system-time()
51 let $documentAvailable := doc-available($fullDocumentUri)
52 let $document := doc($fullDocumentUri)
53 let $metadata :=
54 if ($docbase = 'archimedes')
55 then $document/archimedes/info
56 else if ($docbase = 'echo')
57 then $document/echo:echo/echo:metadata
58 else ''
59
60 let $pageBreaks :=
61 if ($docbase = 'archimedes')
62 then $document//pb
63 else if ($docbase = 'echo')
64 then $document//echo:pb
65 else $document//pb
66 let $countPagesTemp := count($pageBreaks)
67 let $countPages :=
68 if ($countPagesTemp > 0)
69 then $countPagesTemp
70 else 1
71
72 (: xQuery inline execution does not work in module so it has to be done here :)
73 let $xQueryPageSize := 100
74 let $xQueryResultEval :=
75 if ($queryType = 'xpath' or $queryType = 'xquery' and $query != "")
76 then util:eval-inline($document, $query)
77 else ()
78 let $xQueryFrom := ($queryResultPN * $xQueryPageSize) - $xQueryPageSize + 1
79 let $xQueryTo := $queryResultPN * $xQueryPageSize
80 let $xQueryResultEntries :=
81 for $entry at $pos in $xQueryResultEval
82 where $pos >= $xQueryFrom and $pos <= $xQueryTo
83 return $entry
84 let $xQuerySize := count($xQueryResultEval)
85 let $xQueryPages :=
86 if ($xQuerySize = 0)
87 then 0
88 else $xQuerySize idiv $xQueryPageSize + 1
89 let $xQueryResult :=
90 <result>
91 <size>{$xQuerySize}</size>
92 <page-size>{$xQueryPageSize}</page-size>
93 <pages>{$xQueryPages}</pages>
94 <pn>{$queryResultPN}</pn>
95 <hits>{$xQueryResultEntries}</hits>
96 </result>
97
98 let $queryResult :=
99 if (($queryType = 'fulltext' or $queryType = 'fulltextMorph' or $queryType = 'fulltextMorphLemma') and $query != "")
100 then mpdl-lucene:search($docbase, $language, $document, $queryType, $query, $queryResultPN, 10)
101 else if (($queryType = 'ftIndex' or $queryType = 'ftIndexMorph') and $query != "")
102 then mpdl-text:indexTerms($docbase, $language, $document, $query, $queryResultPN, 100)
103 else if ($queryType = 'xpath' or $queryType = 'xquery' and $query != "")
104 then $xQueryResult
105 else if ($queryType = 'toc' or $queryType = 'figures')
106 then mpdl-text:get-toc($docbase, $queryType, $document, $queryResultPN, 100)
107 else if ($query = "")
108 then
109 <result>
110 <size>0</size>
111 <pages>0</pages>
112 <pn>0</pn>
113 <hits/>
114 </result>
115 else ()
116
117 let $countHits := count($queryResult/result/hits/hit)
118 let $firstHit := $queryResult/result/hits/hit[1]
119 (: jump to first pn and sn hit in fulltext mode :)
120 let $pn :=
121 if (($queryType = 'fulltext' or $queryType = 'fulltextMorph' or $queryType = 'fulltextMorphLemma') and $countHits > 0 and $reqPN <= 0)
122 then number($firstHit/pn)
123 else if ($reqPN = -1)
124 then 1
125 else $reqPN
126 let $sn :=
127 if (($queryType = 'fulltext' or $queryType = 'fulltextMorph' or $queryType = 'fulltextMorphLemma') and $countHits > 0 and $reqPN <= 0 and $reqSN < 0)
128 then number($firstHit/pos-of-s)
129 else $reqSN
130
131 (: 10 or more is an error :)
132 let $errorCode :=
133 if (not($documentAvailable))
134 then 10
135 else if ($countPagesTemp != 0 and ($pn > $countPagesTemp or $pn <= 0))
136 then 11
137 else if ($countPagesTemp = 0)
138 then 1 (: if no page break is found then the document should have exactly one page :)
139 else if (not($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml" or $mode = "pureXml"))
140 then 12
141 else 0
142
143 let $pb1 :=
144 if ($errorCode = 0)
145 then subsequence($pageBreaks, $pn, 1)
146 else if ($errorCode = 1)
147 then subsequence(mpdl-lucene:getText($docbase, $document), 1, 1)
148 else ()
149 let $pb2 :=
150 if ($errorCode = 0)
151 then subsequence($pageBreaks, $pn + 1, 1)
152 else if ($errorCode = 1)
153 then subsequence(mpdl-lucene:getText($docbase, $document), 2, 1)
154 else ()
155 let $pageHeader := string($pb1/@rhead)
156 let $pageNumberOrig := string($pb1/@o)
157
158 let $documentIdentifier :=
159 if ($docbase = 'archimedes')
160 then $metadata/locator
161 else if ($docbase = 'echo')
162 then $metadata/dcterms:identifier
163 else $metadata/dcterms:identifier
164 let $echoDocIdentifier :=
165 if ($documentIdentifier != '')
166 then substring-before(substring-after($documentIdentifier, "ECHO:"), ".")
167 else ''
168 let $echoURLZogilib := "http://echo.mpiwg-berlin.mpg.de/zogilib"
169 let $nausikaaURLScaler := "http://nausikaa2.rz-berlin.mpg.de/digitallibrary/servlet/Scaler"
170 let $nausikaaURLDlInfo := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/dlInfo-xml.jsp"
171 let $nausikaaURLTexter := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter"
172 let $echoImageDir :=
173 if ($docbase = 'archimedes')
174 then string($metadata/echodir)
175 else if ($docbase = 'echo')
176 then string($metadata/echo:echodir)
177 else ''
178 let $imagesDocDirectory :=
179 if ($echoImageDir != '')
180 then $echoImageDir
181 else if ($docbase = 'archimedes')
182 then concat("/permanent/archimedes/", $documentName)
183 else if ($docbase = 'echo')
184 then concat("/permanent/library/", $echoDocIdentifier)
185 else ''
186 let $imagesDocDirectoryIndexMetaUrl :=
187 if ($mode = "image" or $mode = "text" or $mode = "textPollux" or $mode = "gis")
188 then concat($nausikaaURLTexter, "?fn=", $imagesDocDirectory, "/index.meta")
189 else ()
190 let $digilibAvailable := mpdldoc:check-uri($imagesDocDirectoryIndexMetaUrl, ())
191 let $imagesDocDirectoryIndexMeta :=
192 if (($mode = "image" or $mode = "text" or $mode = "textPollux" or $mode = "gis") and $digilibAvailable)
193 then doc($imagesDocDirectoryIndexMetaUrl)
194 else ()
195 let $pageImageDirectory := string($imagesDocDirectoryIndexMeta/resource/meta/texttool/image)
196 let $figuresImageDirectoryTemp := string($imagesDocDirectoryIndexMeta/resource/meta/texttool/figures)
197 let $figuresImageDirectory :=
198 if ($figuresImageDirectoryTemp != '')
199 then $figuresImageDirectoryTemp
200 else concat(substring-before($pageImageDirectory, "pageimg"), "figures")
201 let $pageImageFileNameWithoutExtension :=
202 if ($docbase = 'echo')
203 then concat("/", string($pb1/@file))
204 else ''
205 let $imageFileName :=
206 if ($reqPF = '')
207 then concat($imagesDocDirectory, "/", $pageImageDirectory, $pageImageFileNameWithoutExtension)
208 else $reqPF
209 let $imageEcho := <image-echo>{$echoURLZogilib}?fn={$imageFileName}&amp;pn={$pn}</image-echo>
210 let $imageScaler := <image-scaler>{$nausikaaURLScaler}?fn={$imageFileName}&amp;pn={$pn}</image-scaler>
211
212 let $imageFileNameUrl := concat($nausikaaURLDlInfo, "?fn=", $imageFileName)
213 let $testImageResult :=
214 if ($mode = 'image' and $digilibAvailable)
215 then doc($imageFileNameUrl)
216 else ()
217 let $testImageResultParamImgFn := string($testImageResult//parameter[@name='img.fn']/@value)
218 let $imageIsAvailable :=
219 if ($testImageResultParamImgFn = '' and $reqPF = '')
220 then 'false'
221 else 'true'
222
223 let $positionOfFirstFigureAfterPB1 :=
224 if ($docbase = 'archimedes')
225 then count($pb1/following::figure[1]/preceding::figure) + 1
226 else if ($docbase = 'echo')
227 then count($pb1/following::echo:figure[1]/preceding::echo:figure) + 1
228 else ()
229
230 let $pageFragmentTmp :=
231 if ($mode = "image" or $errorCode > 9)
232 then ()
233 else if ($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "xml" or $mode = "pureXml")
234 then util:get-fragment-between($pb1, $pb2, true())
235 else ()
236 (: replace the soft hyphen (Unicode character for 00AD) just before the line break by a normal hyphen :)
237 (: delete the hyphen just before the line break in case of options=withoutLBs :)
238 let $pageFragment :=
239 if (($mode = "text" or $mode = "textPollux") and not(contains($options, "withoutLBs")) and contains($pageFragmentTmp, "­<lb"))
240 then replace($pageFragmentTmp, "­<lb", "-<lb")
241 else if (($mode = "text" or $mode = "textPollux") and contains($options, "withoutLBs") and contains($pageFragmentTmp, "-<lb"))
242 then replace($pageFragmentTmp, "-<lb", "<lb")
243 else $pageFragmentTmp
244 let $pageFragmentNormalized :=
245 if ($mode = "image" or $errorCode > 9)
246 then ()
247 else if (($mode = "text" or $mode = "textPollux" or $mode = "gis") and $charNorm = "")
248 then mpdltext:normalizeChars('reg,norm', $language, $pageFragment)
249 else if (($mode = "xml" or $mode = "pureXml") and $charNorm = "")
250 then $pageFragment
251 else if (($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "xml" or $mode = "pureXml") and $charNorm != "")
252 then mpdltext:normalizeChars($charNorm, $language, $pageFragment)
253 else ()
254 let $retPageFragment :=
255 if ($mode = "image" or $errorCode > 9)
256 then ()
257 else if ($mode = "text" or $mode = "gis" or $mode = "xml" or $mode = "pureXml")
258 then $pageFragmentNormalized
259 else if ($mode = "textPollux")
260 then mpdltext:dictionarize($pageFragmentNormalized, $language)
261 else ()
262 let $returnPageFragmentTmp := util:parse($retPageFragment) (: returns a valid xml document for that string :)
263
264 let $externalElementsTmpTmp := mpdltext:externalObject("read", "element", "", $mpdlDocUri, string($pn), "", "", "")
265 let $externalElementsTmp :=
266 if(not($externalElementsTmpTmp = ""))
267 then util:parse($externalElementsTmpTmp)
268 else ()
269 let $externalElements := $externalElementsTmp/result/element
270 let $containsExternalElements :=
271 if(not(empty($externalElements)))
272 then true()
273 else false()
274 (: let $bla := error(QName("Bla", "Bla"), util:serialize($externalElementsTmp, ())) :)
275 let $returnPageFragmentTmpp :=
276 if (contains($options, "withXmlNodeId") or $containsExternalElements)
277 then mpdl-text:insertNodeIdAttribute($returnPageFragmentTmp/*[1])
278 else $returnPageFragmentTmp
279 let $sentences := util:eval-inline($returnPageFragmentTmpp, ".//s")
280 let $s4NodeId := subsequence($sentences, 4, 1)/@xmlNodeId
281 let $s5NodeId := subsequence($sentences, 5, 1)/@xmlNodeId
282 let $testExternalObjects :=
283 (<element uid="joe" documentId="{$mpdlDocUri}" pageNumber="14" xmlNodeId="{$s4NodeId}" before="true" charPos="10"><content><note>This is a first test note</note></content></element>,
284 <element uid="joe" documentId="{$mpdlDocUri}" pageNumber="14" xmlNodeId="{$s5NodeId}" before="false" charPos="-1"><content><note>This is a second test note</note></content></element>)
285
286 let $returnPageFragment :=
287 if($containsExternalElements)
288 then mpdl-text:insert($returnPageFragmentTmpp/*[1], $externalElements)
289 else $returnPageFragmentTmpp
290
291 let $pageFigureAnchors := $returnPageFragment//anchor[@type = 'figure']
292 let $pageFigures :=
293 for $pageFigureAnchor in $pageFigureAnchors
294 let $figureHref := string($pageFigureAnchor/@xlink:href)
295 let $pageFigureTmp := $document//echo:figure[@xlink:label = $figureHref]
296 let $pageFigure := subsequence($pageFigureTmp, 1, 1)
297 return
298 $pageFigure
299 let $pageHandwrittenAnchors := $returnPageFragment//anchor[@type = 'handwritten']
300 let $pageHandwritten :=
301 for $pageHandwrittenAnchor in $pageHandwrittenAnchors
302 let $handwrittenHref := string($pageHandwrittenAnchor/@xlink:href)
303 let $pageHandwrittenTmp := $document//echo:handwritten[@xlink:label = $handwrittenHref]
304 let $pageHandwritten := subsequence($pageHandwrittenTmp, 1, 1)
305 return
306 $pageHandwritten
307 let $pageTableAnchors := $returnPageFragment//anchor[@type = 'table']
308 let $pageTables :=
309 for $pageTableAnchor in $pageTableAnchors
310 let $tableHref := string($pageTableAnchor/@xlink:href)
311 let $pageTableTmp := $document//xhtml:table[@xlink:label = $tableHref]
312 let $pageTable := subsequence($pageTableTmp, 1, 1)
313 return
314 $pageTable
315 let $pageNoteAnchors := $returnPageFragment//anchor[@type = 'note']
316 let $pageNotes :=
317 if ($docbase = "echo")
318 then
319 for $pageNoteAnchor in $pageNoteAnchors
320 let $noteHref := string($pageNoteAnchor/@xlink:href)
321 let $pageNoteTmp := $document//echo:note[@xlink:label = $noteHref]
322 let $pageNote := subsequence($pageNoteTmp, 1, 1)
323 return
324 $pageNote
325 else
326 $returnPageFragment//note
327
328 (: Metadata handling: only metadata of the selected document is scanned :)
329 let $identifier := $documentIdentifier
330 let $authors := mpdl-lucene:getElementsByAttr($metadata, $docbase, "author")
331 let $titles := mpdl-lucene:getElementsByAttr($metadata, $docbase, "title")
332 let $places := mpdl-lucene:getElementsByAttr($metadata, $docbase, "place")
333 let $date := mpdl-lucene:getElementsByAttr($metadata, $docbase, "date")
334 let $rights := mpdl-lucene:getElementsByAttr($metadata, $docbase, "rights")
335 let $accessRights := mpdl-lucene:getElementsByAttr($metadata, $docbase, "accessRights")
336 let $licenses := mpdl-lucene:getElementsByAttr($metadata, $docbase, "license")
337 let $file := mpdl-lucene:getElementsByAttr($metadata, $docbase, "file")
338 let $translator := mpdl-lucene:getElementsByAttr($metadata, $docbase, "translator")
339 let $version := mpdl-lucene:getElementsByAttr($metadata, $docbase, "version")
340
341 let $currentTimeEnd := util:system-time()
342 let $neededTime := mpdl-time:duration-as-ms($currentTimeEnd - $currentTimeBegin)
343
344 let $xmlResult :=
345 if ($errorCode < 10)
346 then
347 <result>
348 <document-description>
349 <uri>{$mpdlDocUri}</uri>
350 <collection-name>{$docbase}</collection-name>
351 <document-name>{$documentName}</document-name>
352 <language>{$language}</language>
353 <authors>{$authors}</authors>
354 <titles>{$titles}</titles>
355 <places>{$places}</places>
356 <date>{$date}</date>
357 <identifier>{$identifier}</identifier>
358 <rights>{$rights}</rights>
359 <accessRights>{$accessRights}</accessRights>
360 <licenses>{$licenses}</licenses>
361 <file>{$file}</file>
362 <translator>{$translator}</translator>
363 <version>{$version}</version>
364 <count-pages>{$countPages}</count-pages>
365 </document-description>
366 <page>
367 <mode>{$mode}</mode>
368 <number>{$pn}</number>
369 <sentence-number>{$sn}</sentence-number>
370 <header>{$pageHeader}</header>
371 <number-orig>{$pageNumberOrig}</number-orig>
372 <digilib-available>{$digilibAvailable}</digilib-available>
373 <image-available>{$imageIsAvailable}</image-available>
374 <image-file-name>{$imageFileName}</image-file-name>
375 {$imageEcho}
376 {$imageScaler}
377 <xml-url>?document={$documentName}&amp;pn={$pn}&amp;mode=xml</xml-url>
378 <page-image-directory>{$imagesDocDirectory}/{$pageImageDirectory}</page-image-directory>
379 <figures-image-directory>{$imagesDocDirectory}/{$figuresImageDirectory}</figures-image-directory>
380 <firstFigurePosition>{$positionOfFirstFigureAfterPB1}</firstFigurePosition>
381 <figures>{$pageFigures}</figures>
382 <handwritten>{$pageHandwritten}</handwritten>
383 <tables>{$pageTables}</tables>
384 <notes>{$pageNotes}</notes>
385 <content>{$returnPageFragment}</content>
386 <character-normalization>{$charNorm}</character-normalization>
387 <options>{$options}</options>
388 </page>
389 <query>
390 <type>{$queryType}</type>
391 <expression>{$query}</expression>
392 {$queryResult}
393 </query>
394 <performance>{$neededTime}</performance>
395 </result>
396 else if ($errorCode = 10)
397 then <error><number>{$errorCode}</number><description>Fulltext document: {$mpdlDocUri} is not available yet</description></error>
398 else if ($errorCode = 11)
399 then <error><number>{$errorCode}</number><description>No result: Page {$pn} not found</description></error>
400 else if ($errorCode = 12)
401 then <error><number>{$errorCode}</number><description>View mode {$mode} not available</description></error>
402 else <error><number>{$errorCode}</number><description>undefined error: {$errorCode}</description></error>
403
404 let $declare :=
405 if ($errorCode > 9 or $mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml")
406 then util:declare-option("exist:serialize", "method=xhtml media-type=text/html omit-xml-declaration=no indent=yes encoding=utf-8")
407 else if ($mode = "pureXml")
408 then util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8")
409 else util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8")
410 let $xslFilePath :=
411 if($reqExport = "pdf")
412 then concat($presentationPath, "/pageFragmentHtml.xsl")
413 else if($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml")
414 then concat($presentationPath, "/pageHtml.xsl")
415 else concat($presentationPath, "/pageXml.xsl")
416
417 let $titleStr := concat(string-join($authors, ', '), ". ", string-join($titles, ', '), ". ", string-join($places, ', '), " ", $date, ".")
418 let $tmpResult :=
419 if ($errorCode < 10 and $reqExport = "pdf")
420 then mpdl-text:html2pdf($language, $xmlResult, $xslFilePath, $titleStr, $pn, $mode)
421 else if ($errorCode < 10 and not($reqExport = "pdf"))
422 then mpdl-text:transform($xmlResult, $xslFilePath)
423 else
424 <div>{$xmlResult}</div> (: error xml result :)
425 let $result :=
426 if ($errorCode < 10 and $reqExport = "pdf")
427 then response:stream-binary($tmpResult, "application/pdf", concat($documentName, "-page", $pn, ".pdf"))
428 else $tmpResult
429
430 let $setHeaderXmlFilename :=
431 if ($mode = "pureXml" and $queryType = "xpath" and $query != "")
432 then response:set-header('Content-Disposition', concat('filename=', $documentName, '-xpath-result--', $query, '--'))
433 else if ($mode = "pureXml" and $queryType = "xquery" and $query != "")
434 then response:set-header('Content-Disposition', concat('filename=', 'xquery-result'))
435 else if ($mode = "pureXml")
436 then response:set-header('Content-Disposition', concat('filename=', $documentName, '-page', $pn))
437 else ()
438
439 return $result