comparison software/eXist/webapp/mpdl/interface/page-fragment.xql @ 7:5589d865af7a

Erstellung XQL/XSL Applikation
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 08 Feb 2011 15:16:46 +0100
parents
children d2a1c14fde31
comparison
equal deleted inserted replaced
6:2396a569e446 7:5589d865af7a
1 xquery version "1.0";
2
3 import module namespace mpdl-time = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/util/time" at "../util/time.xql";
4 import module namespace functx = "http://www.functx.com" at "../util/functx.xql";
5 import module namespace mpdl-lucene = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/lucene/search" at "../lucene/search.xql";
6 import module namespace mpdl-text = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/text" at "../text/all.xql";
7
8 declare namespace xlink="http://www.w3.org/1999/xlink";
9 declare namespace request = "http://exist-db.org/xquery/request";
10 declare namespace transform = "http://exist-db.org/xquery/transform";
11 declare namespace util = "http://exist-db.org/xquery/util";
12
13 declare namespace dcterms="http://purl.org/dc/terms";
14 declare namespace xhtml="http://www.w3.org/1999/xhtml";
15 declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/";
16
17 let $mpdlDocUri := request:get-parameter("document", "")
18 let $mode := request:get-parameter("mode", "image")
19
20 let $reqPN := number(request:get-parameter("pn", "-1"))
21 let $reqPF := request:get-parameter("pf", "")
22 let $reqSN := number(request:get-parameter("sn", "-1"))
23 let $highlightQuery := request:get-parameter("highlightQuery", "")
24 let $regCharNorm := request:get-parameter("characterNormalization", "")
25 let $tmpCharNorm := string-join($regCharNorm, ',')
26 let $charNorm :=
27 if($tmpCharNorm = "regPlusNorm")
28 then "reg,norm"
29 else $tmpCharNorm
30 let $reqExport := request:get-parameter("export", "")
31 let $options := string(request:get-parameter("options", ""))
32
33 let $presentationPath := "/db/mpdl/presentation"
34 (: e.g. mpdlCollectioName is derived from mpdlDocUri: /archimedes/la/yourDoc.xml :)
35 let $documentName := substring-before(substring-after(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/"), ".")
36 let $language := substring-before(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/")
37 let $docbase := substring-before(substring-after($mpdlDocUri, "/"), "/")
38 let $fullDocumentUri := concat('/db/mpdl/documents/morph', $mpdlDocUri)
39 let $currentTimeBegin := util:system-time()
40 let $documentAvailable := doc-available($fullDocumentUri)
41 let $document := doc($fullDocumentUri)
42 let $metadata :=
43 if ($docbase = 'archimedes')
44 then $document/archimedes/info
45 else if ($docbase = 'echo')
46 then $document/echo:echo/echo:metadata
47 else ''
48
49 let $pageBreaks :=
50 if ($docbase = 'archimedes')
51 then $document//pb
52 else if ($docbase = 'echo')
53 then $document//echo:pb
54 else $document//pb
55 let $countPagesTemp := count($pageBreaks)
56 let $countPages :=
57 if ($countPagesTemp > 0)
58 then $countPagesTemp
59 else 1
60
61 (: for performance reasons: deliver count of gis places and toc/figure entries :)
62 let $gisPlaces :=
63 if ($docbase = 'echo')
64 then $document//echo:place
65 else ()
66 let $countGisPlaces := count($gisPlaces)
67 let $tocEntries :=
68 if ($docbase = 'echo')
69 then $document//echo:div[@type = 'section' or @type = 'chapter']
70 else ()
71 let $figureEntries :=
72 if ($docbase = 'echo')
73 then $document//echo:figure
74 else if ($docbase = 'archimedes')
75 then $document//figure
76 else ()
77 let $countTocEntries := count($tocEntries)
78 let $countFigureEntries := count($figureEntries)
79
80 (: jump to first pn and sn hit in fulltext mode :)
81 let $pn :=
82 if ($reqPN = -1)
83 then 1
84 else $reqPN
85 let $sn := $reqSN
86
87 (: 10 or more is an error :)
88 let $errorCode :=
89 if (not($documentAvailable))
90 then 10
91 else if ($countPagesTemp != 0 and ($pn > $countPagesTemp or $pn <= 0))
92 then 11
93 else if ($countPagesTemp = 0)
94 then 1 (: if no page break is found then the document should have exactly one page :)
95 else if (not($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml" or $mode = "pureXml"))
96 then 12
97 else 0
98
99 let $pb1 :=
100 if ($errorCode = 0)
101 then subsequence($pageBreaks, $pn, 1)
102 else if ($errorCode = 1)
103 then subsequence(mpdl-lucene:getText($docbase, $document), 1, 1)
104 else ()
105 let $pb2 :=
106 if ($errorCode = 0)
107 then subsequence($pageBreaks, $pn + 1, 1)
108 else if ($errorCode = 1)
109 then subsequence(mpdl-lucene:getText($docbase, $document), 2, 1)
110 else ()
111 let $pageHeader := string($pb1/@rhead)
112 let $pageNumberOrig := string($pb1/@o)
113
114 let $documentIdentifier :=
115 if ($docbase = 'archimedes')
116 then $metadata/locator
117 else if ($docbase = 'echo')
118 then $metadata/dcterms:identifier
119 else $metadata/dcterms:identifier
120 let $echoDocIdentifier :=
121 if ($documentIdentifier != '')
122 then substring-before(substring-after($documentIdentifier, "ECHO:"), ".")
123 else ''
124 let $echoURLZogilib := "http://echo.mpiwg-berlin.mpg.de/zogilib"
125 let $nausikaaURLScaler := "http://nausikaa2.rz-berlin.mpg.de/digitallibrary/servlet/Scaler"
126 let $nausikaaURLDlInfo := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/dlInfo-xml.jsp"
127 let $nausikaaURLTexter := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter"
128 let $echoImageDir :=
129 if ($docbase = 'archimedes')
130 then string($metadata/echodir)
131 else if ($docbase = 'echo')
132 then string($metadata/echo:echodir)
133 else ''
134 let $imagesDocDirectory :=
135 if ($echoImageDir != '')
136 then $echoImageDir
137 else if ($docbase = 'archimedes')
138 then concat("/permanent/archimedes/", $documentName)
139 else if ($docbase = 'echo')
140 then concat("/permanent/library/", $echoDocIdentifier)
141 else ''
142 let $imagesDocDirectoryIndexMetaUrl :=
143 if ($mode = "image" or $mode = "text" or $mode = "textPollux" or $mode = "gis")
144 then concat($nausikaaURLTexter, "?fn=", $imagesDocDirectory, "/index.meta")
145 else ()
146 let $digilibAvailable := mpdldoc:check-uri($imagesDocDirectoryIndexMetaUrl, 2000)
147 let $imagesDocDirectoryIndexMeta :=
148 if (($mode = "image" or $mode = "text" or $mode = "textPollux" or $mode = "gis") and $digilibAvailable)
149 then doc($imagesDocDirectoryIndexMetaUrl)
150 else ()
151 let $pageImageDirectory := string($imagesDocDirectoryIndexMeta/resource/meta/texttool/image)
152 let $figuresImageDirectoryTemp := string($imagesDocDirectoryIndexMeta/resource/meta/texttool/figures)
153 let $figuresImageDirectory :=
154 if ($figuresImageDirectoryTemp != '')
155 then $figuresImageDirectoryTemp
156 else concat(substring-before($pageImageDirectory, "pageimg"), "figures")
157 let $pageImageFileNameWithoutExtension :=
158 if ($docbase = 'echo')
159 then concat("/", string($pb1/@file))
160 else ''
161 let $imageFileName :=
162 if ($reqPF = '')
163 then concat($imagesDocDirectory, "/", $pageImageDirectory, $pageImageFileNameWithoutExtension)
164 else $reqPF
165 let $imageEcho := <image-echo>{$echoURLZogilib}?fn={$imageFileName}&amp;pn={$pn}</image-echo>
166 let $imageScaler := <image-scaler>{$nausikaaURLScaler}?fn={$imageFileName}&amp;pn={$pn}</image-scaler>
167
168 let $imageFileNameUrl := concat($nausikaaURLDlInfo, "?fn=", $imageFileName)
169 let $testImageResult :=
170 if ($mode = 'image' and $digilibAvailable)
171 then doc($imageFileNameUrl)
172 else ()
173 let $testImageResultParamImgFn := string($testImageResult//parameter[@name='img.fn']/@value)
174 let $imageIsAvailable :=
175 if ($testImageResultParamImgFn = '' and $reqPF = '')
176 then 'false'
177 else 'true'
178
179 let $positionOfFirstFigureAfterPB1 :=
180 if ($docbase = 'archimedes')
181 then count($pb1/following::figure[1]/preceding::figure) + 1
182 else if ($docbase = 'echo')
183 then count($pb1/following::echo:figure[1]/preceding::echo:figure) + 1
184 else ()
185
186 let $pageFragmentTmp :=
187 if ($mode = "image" or $errorCode > 9)
188 then ()
189 else if ($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "xml" or $mode = "pureXml")
190 then util:get-fragment-between($pb1, $pb2, true())
191 else ()
192 (: replace the soft hyphen (Unicode character for 00AD) just before the line break by a normal hyphen :)
193 (: delete the hyphen just before the line break in case of options=withoutLBs :)
194 let $pageFragment :=
195 if (($mode = "text" or $mode = "textPollux") and not(contains($options, "withoutLBs")) and contains($pageFragmentTmp, "­<lb"))
196 then replace($pageFragmentTmp, "­<lb", "-<lb")
197 else if (($mode = "text" or $mode = "textPollux") and contains($options, "withoutLBs") and contains($pageFragmentTmp, "-<lb"))
198 then replace($pageFragmentTmp, "-<lb", "<lb")
199 else $pageFragmentTmp
200 let $pageFragmentNormalized :=
201 if ($mode = "image" or $errorCode > 9)
202 then ()
203 else if (($mode = "text" or $mode = "textPollux" or $mode = "gis") and $charNorm = "")
204 then mpdltext:normalizeChars('reg,norm', $language, $pageFragment)
205 else if (($mode = "xml" or $mode = "pureXml") and $charNorm = "")
206 then $pageFragment
207 else if (($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "xml" or $mode = "pureXml") and $charNorm != "")
208 then mpdltext:normalizeChars($charNorm, $language, $pageFragment)
209 else ()
210 let $retPageFragment :=
211 if ($mode = "image" or $errorCode > 9)
212 then ()
213 else if ($mode = "text" or $mode = "gis" or $mode = "xml" or $mode = "pureXml")
214 then $pageFragmentNormalized
215 else if ($mode = "textPollux")
216 then mpdltext:dictionarize($pageFragmentNormalized, $language)
217 else ()
218 let $returnPageFragmentTmp := util:parse($retPageFragment) (: returns a valid xml document for that string :)
219
220 let $externalElementsTmpTmp := mpdltext:externalObject("read", "element", "", $mpdlDocUri, string($pn), "", "", "")
221 let $externalElementsTmp :=
222 if(not($externalElementsTmpTmp = ""))
223 then util:parse($externalElementsTmpTmp)
224 else ()
225 let $externalElements := $externalElementsTmp/result/element
226 let $containsExternalElements :=
227 if(not(empty($externalElements)))
228 then true()
229 else false()
230 let $returnPageFragmentTmpp :=
231 if (contains($options, "withXmlNodeId") or $containsExternalElements)
232 then mpdl-text:insertNodeIdAttribute($returnPageFragmentTmp/*[1])
233 else $returnPageFragmentTmp
234
235 let $returnPageFragment :=
236 if($containsExternalElements)
237 then mpdl-text:insert($returnPageFragmentTmpp/*[1], $externalElements)
238 else $returnPageFragmentTmpp
239
240 let $pageFigureAnchors := $returnPageFragment//anchor[@type = 'figure']
241 let $pageFigures :=
242 for $pageFigureAnchor in $pageFigureAnchors
243 let $href := string($pageFigureAnchor/@xlink:href)
244 let $pageFigureTmp := $document//echo:figure[@xlink:label = $href]
245 let $pageFigure := subsequence($pageFigureTmp, 1, 1)
246 return
247 $pageFigure
248 let $pageHandwrittenAnchors := $returnPageFragment//anchor[@type = 'handwritten']
249 let $pageHandwritten :=
250 for $pageHandwrittenAnchor in $pageHandwrittenAnchors
251 let $handwrittenHref := string($pageHandwrittenAnchor/@xlink:href)
252 let $pageHandwrittenTmp := $document//echo:handwritten[@xlink:label = $handwrittenHref]
253 let $pageHandwritten := subsequence($pageHandwrittenTmp, 1, 1)
254 return
255 $pageHandwritten
256 let $pageTableAnchors := $returnPageFragment//anchor[@type = 'table']
257 let $pageTables :=
258 for $pageTableAnchor in $pageTableAnchors
259 let $tableHref := string($pageTableAnchor/@xlink:href)
260 let $pageTableTmp := $document//xhtml:table[@xlink:label = $tableHref]
261 let $pageTable := subsequence($pageTableTmp, 1, 1)
262 return
263 $pageTable
264 let $pageNoteAnchors := $returnPageFragment//anchor[@type = 'note']
265 let $pageNotes :=
266 if ($docbase = "echo")
267 then
268 for $pageNoteAnchor in $pageNoteAnchors
269 let $noteHref := string($pageNoteAnchor/@xlink:href)
270 let $pageNoteTmp := $document//echo:note[@xlink:label = $noteHref]
271 let $pageNote := subsequence($pageNoteTmp, 1, 1)
272 return
273 $pageNote
274 else
275 $returnPageFragment//note
276
277 (: Metadata handling: only metadata of the selected document is scanned :)
278 let $identifier := $documentIdentifier
279 let $authors := mpdl-lucene:getElementsByAttr($metadata, $docbase, "author")
280 let $titles := mpdl-lucene:getElementsByAttr($metadata, $docbase, "title")
281 let $places := mpdl-lucene:getElementsByAttr($metadata, $docbase, "place")
282 let $date := mpdl-lucene:getElementsByAttr($metadata, $docbase, "date")
283 let $rights := mpdl-lucene:getElementsByAttr($metadata, $docbase, "rights")
284 let $accessRights := mpdl-lucene:getElementsByAttr($metadata, $docbase, "accessRights")
285 let $licenses := mpdl-lucene:getElementsByAttr($metadata, $docbase, "license")
286 let $file := mpdl-lucene:getElementsByAttr($metadata, $docbase, "file")
287 let $translator := mpdl-lucene:getElementsByAttr($metadata, $docbase, "translator")
288 let $version := mpdl-lucene:getElementsByAttr($metadata, $docbase, "version")
289
290 let $highlightQueryWordsTemp :=
291 if ($highlightQuery != '')
292 then mpdltext:get-query-morph-forms($language, $highlightQuery)
293 else ''
294 let $highlightQueryRegularizations :=
295 if ($highlightQuery != '')
296 then mpdltext:get-query-regularizations($language, $highlightQuery)
297 else ''
298 let $highlightQueryWords :=
299 if ($highlightQueryWordsTemp != '' and $highlightQueryRegularizations = '')
300 then $highlightQueryWordsTemp
301 else if ($highlightQueryWordsTemp = '' and $highlightQueryRegularizations != '')
302 then $highlightQueryRegularizations
303 else if ($highlightQueryWordsTemp != '' and $highlightQueryRegularizations != '')
304 then concat($highlightQueryWordsTemp, '|', $highlightQueryRegularizations)
305 else ()
306
307 let $currentTimeEnd := util:system-time()
308 let $neededTime := mpdl-time:duration-as-ms($currentTimeEnd - $currentTimeBegin)
309
310 let $xmlResult :=
311 if ($errorCode < 10)
312 then
313 <result>
314 <document-description>
315 <uri>{$mpdlDocUri}</uri>
316 <collection-name>{$docbase}</collection-name>
317 <document-name>{$documentName}</document-name>
318 <language>{$language}</language>
319 <authors>{$authors}</authors>
320 <titles>{$titles}</titles>
321 <places>{$places}</places>
322 <date>{$date}</date>
323 <identifier>{$identifier}</identifier>
324 <rights>{$rights}</rights>
325 <accessRights>{$accessRights}</accessRights>
326 <licenses>{$licenses}</licenses>
327 <file>{$file}</file>
328 <translator>{$translator}</translator>
329 <version>{$version}</version>
330 <count-pages>{$countPages}</count-pages>
331 <count-places>{$countGisPlaces}</count-places>
332 <count-toc-entries>{$countTocEntries}</count-toc-entries>
333 <count-figure-entries>{$countFigureEntries}</count-figure-entries>
334 </document-description>
335 <page>
336 <mode>{$mode}</mode>
337 <number>{$pn}</number>
338 <header>{$pageHeader}</header>
339 <number-orig>{$pageNumberOrig}</number-orig>
340 <sentence-number>{$sn}</sentence-number>
341 <digilib-available>{$digilibAvailable}</digilib-available>
342 <image-available>{$imageIsAvailable}</image-available>
343 <image-file-name>{$imageFileName}</image-file-name>
344 {$imageEcho}
345 {$imageScaler}
346 <xml-url>?document={$documentName}&amp;pn={$pn}&amp;mode=xml</xml-url>
347 <page-image-directory>{$imagesDocDirectory}/{$pageImageDirectory}</page-image-directory>
348 <figures-image-directory>{$imagesDocDirectory}/{$figuresImageDirectory}</figures-image-directory>
349 <firstFigurePosition>{$positionOfFirstFigureAfterPB1}</firstFigurePosition>
350 <figures>{$pageFigures}</figures>
351 <handwritten>{$pageHandwritten}</handwritten>
352 <tables>{$pageTables}</tables>
353 <notes>{$pageNotes}</notes>
354 <highlights>
355 <query>{$highlightQuery}</query>
356 <words>{$highlightQueryWords}</words>
357 </highlights>
358 <content>{$returnPageFragment}</content>
359 <character-normalization>{$charNorm}</character-normalization>
360 <options>{$options}</options>
361 </page>
362 <performance>{$neededTime}</performance>
363 </result>
364 else if ($errorCode = 10)
365 then <error><number>{$errorCode}</number><description>Fulltext document: {$mpdlDocUri} is not available yet</description></error>
366 else if ($errorCode = 11)
367 then <error><number>{$errorCode}</number><description>No result: Page {$pn} not found</description></error>
368 else if ($errorCode = 12)
369 then <error><number>{$errorCode}</number><description>View mode {$mode} not available</description></error>
370 else <error><number>{$errorCode}</number><description>undefined error: {$errorCode}</description></error>
371
372 let $declare :=
373 if ($errorCode > 9 or $mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml")
374 then util:declare-option("exist:serialize", "method=xhtml media-type=text/html omit-xml-declaration=no indent=yes encoding=utf-8")
375 else if ($mode = "pureXml")
376 then util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8")
377 else util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8")
378 let $xslFilePath :=
379 if($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml")
380 then concat($presentationPath, "/pageFragmentHtml.xsl")
381 else concat($presentationPath, "/pageXml.xsl")
382
383 let $titleStr := concat(string-join($authors, ', '), ". ", string-join($titles, ', '), ". ", string-join($places, ', '), " ", $date, ".")
384 let $tmpResult :=
385 if ($errorCode < 10 and $reqExport = "pdf")
386 then mpdl-text:html2pdf($language, $xmlResult, $xslFilePath, $titleStr, $pn, $mode)
387 else if ($errorCode < 10 and not($reqExport = "pdf"))
388 then mpdl-text:transform($xmlResult, $xslFilePath)
389 else
390 <div>{$xmlResult}</div> (: error xml result :)
391 let $result :=
392 if ($errorCode < 10 and $reqExport = "pdf")
393 then response:stream-binary($tmpResult, "application/pdf", concat($documentName, "-page", $pn, ".pdf"))
394 else $tmpResult
395
396 let $setHeader :=
397 if ($mode = "pureXml")
398 then response:set-header('Content-Disposition', concat('filename=', $documentName, '-page', $pn))
399 else ()
400
401 return $result