Mercurial > hg > mpdl-group
comparison software/eXist/webapp/mpdl/interface/page-fragment.xql @ 7:5589d865af7a
Erstellung XQL/XSL Applikation
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 15:16:46 +0100 |
parents | |
children | d2a1c14fde31 |
comparison
equal
deleted
inserted
replaced
6:2396a569e446 | 7:5589d865af7a |
---|---|
1 xquery version "1.0"; | |
2 | |
3 import module namespace mpdl-time = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/util/time" at "../util/time.xql"; | |
4 import module namespace functx = "http://www.functx.com" at "../util/functx.xql"; | |
5 import module namespace mpdl-lucene = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/lucene/search" at "../lucene/search.xql"; | |
6 import module namespace mpdl-text = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/text" at "../text/all.xql"; | |
7 | |
8 declare namespace xlink="http://www.w3.org/1999/xlink"; | |
9 declare namespace request = "http://exist-db.org/xquery/request"; | |
10 declare namespace transform = "http://exist-db.org/xquery/transform"; | |
11 declare namespace util = "http://exist-db.org/xquery/util"; | |
12 | |
13 declare namespace dcterms="http://purl.org/dc/terms"; | |
14 declare namespace xhtml="http://www.w3.org/1999/xhtml"; | |
15 declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/"; | |
16 | |
17 let $mpdlDocUri := request:get-parameter("document", "") | |
18 let $mode := request:get-parameter("mode", "image") | |
19 | |
20 let $reqPN := number(request:get-parameter("pn", "-1")) | |
21 let $reqPF := request:get-parameter("pf", "") | |
22 let $reqSN := number(request:get-parameter("sn", "-1")) | |
23 let $highlightQuery := request:get-parameter("highlightQuery", "") | |
24 let $regCharNorm := request:get-parameter("characterNormalization", "") | |
25 let $tmpCharNorm := string-join($regCharNorm, ',') | |
26 let $charNorm := | |
27 if($tmpCharNorm = "regPlusNorm") | |
28 then "reg,norm" | |
29 else $tmpCharNorm | |
30 let $reqExport := request:get-parameter("export", "") | |
31 let $options := string(request:get-parameter("options", "")) | |
32 | |
33 let $presentationPath := "/db/mpdl/presentation" | |
34 (: e.g. mpdlCollectioName is derived from mpdlDocUri: /archimedes/la/yourDoc.xml :) | |
35 let $documentName := substring-before(substring-after(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/"), ".") | |
36 let $language := substring-before(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/") | |
37 let $docbase := substring-before(substring-after($mpdlDocUri, "/"), "/") | |
38 let $fullDocumentUri := concat('/db/mpdl/documents/morph', $mpdlDocUri) | |
39 let $currentTimeBegin := util:system-time() | |
40 let $documentAvailable := doc-available($fullDocumentUri) | |
41 let $document := doc($fullDocumentUri) | |
42 let $metadata := | |
43 if ($docbase = 'archimedes') | |
44 then $document/archimedes/info | |
45 else if ($docbase = 'echo') | |
46 then $document/echo:echo/echo:metadata | |
47 else '' | |
48 | |
49 let $pageBreaks := | |
50 if ($docbase = 'archimedes') | |
51 then $document//pb | |
52 else if ($docbase = 'echo') | |
53 then $document//echo:pb | |
54 else $document//pb | |
55 let $countPagesTemp := count($pageBreaks) | |
56 let $countPages := | |
57 if ($countPagesTemp > 0) | |
58 then $countPagesTemp | |
59 else 1 | |
60 | |
61 (: for performance reasons: deliver count of gis places and toc/figure entries :) | |
62 let $gisPlaces := | |
63 if ($docbase = 'echo') | |
64 then $document//echo:place | |
65 else () | |
66 let $countGisPlaces := count($gisPlaces) | |
67 let $tocEntries := | |
68 if ($docbase = 'echo') | |
69 then $document//echo:div[@type = 'section' or @type = 'chapter'] | |
70 else () | |
71 let $figureEntries := | |
72 if ($docbase = 'echo') | |
73 then $document//echo:figure | |
74 else if ($docbase = 'archimedes') | |
75 then $document//figure | |
76 else () | |
77 let $countTocEntries := count($tocEntries) | |
78 let $countFigureEntries := count($figureEntries) | |
79 | |
80 (: jump to first pn and sn hit in fulltext mode :) | |
81 let $pn := | |
82 if ($reqPN = -1) | |
83 then 1 | |
84 else $reqPN | |
85 let $sn := $reqSN | |
86 | |
87 (: 10 or more is an error :) | |
88 let $errorCode := | |
89 if (not($documentAvailable)) | |
90 then 10 | |
91 else if ($countPagesTemp != 0 and ($pn > $countPagesTemp or $pn <= 0)) | |
92 then 11 | |
93 else if ($countPagesTemp = 0) | |
94 then 1 (: if no page break is found then the document should have exactly one page :) | |
95 else if (not($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml" or $mode = "pureXml")) | |
96 then 12 | |
97 else 0 | |
98 | |
99 let $pb1 := | |
100 if ($errorCode = 0) | |
101 then subsequence($pageBreaks, $pn, 1) | |
102 else if ($errorCode = 1) | |
103 then subsequence(mpdl-lucene:getText($docbase, $document), 1, 1) | |
104 else () | |
105 let $pb2 := | |
106 if ($errorCode = 0) | |
107 then subsequence($pageBreaks, $pn + 1, 1) | |
108 else if ($errorCode = 1) | |
109 then subsequence(mpdl-lucene:getText($docbase, $document), 2, 1) | |
110 else () | |
111 let $pageHeader := string($pb1/@rhead) | |
112 let $pageNumberOrig := string($pb1/@o) | |
113 | |
114 let $documentIdentifier := | |
115 if ($docbase = 'archimedes') | |
116 then $metadata/locator | |
117 else if ($docbase = 'echo') | |
118 then $metadata/dcterms:identifier | |
119 else $metadata/dcterms:identifier | |
120 let $echoDocIdentifier := | |
121 if ($documentIdentifier != '') | |
122 then substring-before(substring-after($documentIdentifier, "ECHO:"), ".") | |
123 else '' | |
124 let $echoURLZogilib := "http://echo.mpiwg-berlin.mpg.de/zogilib" | |
125 let $nausikaaURLScaler := "http://nausikaa2.rz-berlin.mpg.de/digitallibrary/servlet/Scaler" | |
126 let $nausikaaURLDlInfo := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/dlInfo-xml.jsp" | |
127 let $nausikaaURLTexter := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter" | |
128 let $echoImageDir := | |
129 if ($docbase = 'archimedes') | |
130 then string($metadata/echodir) | |
131 else if ($docbase = 'echo') | |
132 then string($metadata/echo:echodir) | |
133 else '' | |
134 let $imagesDocDirectory := | |
135 if ($echoImageDir != '') | |
136 then $echoImageDir | |
137 else if ($docbase = 'archimedes') | |
138 then concat("/permanent/archimedes/", $documentName) | |
139 else if ($docbase = 'echo') | |
140 then concat("/permanent/library/", $echoDocIdentifier) | |
141 else '' | |
142 let $imagesDocDirectoryIndexMetaUrl := | |
143 if ($mode = "image" or $mode = "text" or $mode = "textPollux" or $mode = "gis") | |
144 then concat($nausikaaURLTexter, "?fn=", $imagesDocDirectory, "/index.meta") | |
145 else () | |
146 let $digilibAvailable := mpdldoc:check-uri($imagesDocDirectoryIndexMetaUrl, 2000) | |
147 let $imagesDocDirectoryIndexMeta := | |
148 if (($mode = "image" or $mode = "text" or $mode = "textPollux" or $mode = "gis") and $digilibAvailable) | |
149 then doc($imagesDocDirectoryIndexMetaUrl) | |
150 else () | |
151 let $pageImageDirectory := string($imagesDocDirectoryIndexMeta/resource/meta/texttool/image) | |
152 let $figuresImageDirectoryTemp := string($imagesDocDirectoryIndexMeta/resource/meta/texttool/figures) | |
153 let $figuresImageDirectory := | |
154 if ($figuresImageDirectoryTemp != '') | |
155 then $figuresImageDirectoryTemp | |
156 else concat(substring-before($pageImageDirectory, "pageimg"), "figures") | |
157 let $pageImageFileNameWithoutExtension := | |
158 if ($docbase = 'echo') | |
159 then concat("/", string($pb1/@file)) | |
160 else '' | |
161 let $imageFileName := | |
162 if ($reqPF = '') | |
163 then concat($imagesDocDirectory, "/", $pageImageDirectory, $pageImageFileNameWithoutExtension) | |
164 else $reqPF | |
165 let $imageEcho := <image-echo>{$echoURLZogilib}?fn={$imageFileName}&pn={$pn}</image-echo> | |
166 let $imageScaler := <image-scaler>{$nausikaaURLScaler}?fn={$imageFileName}&pn={$pn}</image-scaler> | |
167 | |
168 let $imageFileNameUrl := concat($nausikaaURLDlInfo, "?fn=", $imageFileName) | |
169 let $testImageResult := | |
170 if ($mode = 'image' and $digilibAvailable) | |
171 then doc($imageFileNameUrl) | |
172 else () | |
173 let $testImageResultParamImgFn := string($testImageResult//parameter[@name='img.fn']/@value) | |
174 let $imageIsAvailable := | |
175 if ($testImageResultParamImgFn = '' and $reqPF = '') | |
176 then 'false' | |
177 else 'true' | |
178 | |
179 let $positionOfFirstFigureAfterPB1 := | |
180 if ($docbase = 'archimedes') | |
181 then count($pb1/following::figure[1]/preceding::figure) + 1 | |
182 else if ($docbase = 'echo') | |
183 then count($pb1/following::echo:figure[1]/preceding::echo:figure) + 1 | |
184 else () | |
185 | |
186 let $pageFragmentTmp := | |
187 if ($mode = "image" or $errorCode > 9) | |
188 then () | |
189 else if ($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "xml" or $mode = "pureXml") | |
190 then util:get-fragment-between($pb1, $pb2, true()) | |
191 else () | |
192 (: replace the soft hyphen (Unicode character for 00AD) just before the line break by a normal hyphen :) | |
193 (: delete the hyphen just before the line break in case of options=withoutLBs :) | |
194 let $pageFragment := | |
195 if (($mode = "text" or $mode = "textPollux") and not(contains($options, "withoutLBs")) and contains($pageFragmentTmp, "<lb")) | |
196 then replace($pageFragmentTmp, "<lb", "-<lb") | |
197 else if (($mode = "text" or $mode = "textPollux") and contains($options, "withoutLBs") and contains($pageFragmentTmp, "-<lb")) | |
198 then replace($pageFragmentTmp, "-<lb", "<lb") | |
199 else $pageFragmentTmp | |
200 let $pageFragmentNormalized := | |
201 if ($mode = "image" or $errorCode > 9) | |
202 then () | |
203 else if (($mode = "text" or $mode = "textPollux" or $mode = "gis") and $charNorm = "") | |
204 then mpdltext:normalizeChars('reg,norm', $language, $pageFragment) | |
205 else if (($mode = "xml" or $mode = "pureXml") and $charNorm = "") | |
206 then $pageFragment | |
207 else if (($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "xml" or $mode = "pureXml") and $charNorm != "") | |
208 then mpdltext:normalizeChars($charNorm, $language, $pageFragment) | |
209 else () | |
210 let $retPageFragment := | |
211 if ($mode = "image" or $errorCode > 9) | |
212 then () | |
213 else if ($mode = "text" or $mode = "gis" or $mode = "xml" or $mode = "pureXml") | |
214 then $pageFragmentNormalized | |
215 else if ($mode = "textPollux") | |
216 then mpdltext:dictionarize($pageFragmentNormalized, $language) | |
217 else () | |
218 let $returnPageFragmentTmp := util:parse($retPageFragment) (: returns a valid xml document for that string :) | |
219 | |
220 let $externalElementsTmpTmp := mpdltext:externalObject("read", "element", "", $mpdlDocUri, string($pn), "", "", "") | |
221 let $externalElementsTmp := | |
222 if(not($externalElementsTmpTmp = "")) | |
223 then util:parse($externalElementsTmpTmp) | |
224 else () | |
225 let $externalElements := $externalElementsTmp/result/element | |
226 let $containsExternalElements := | |
227 if(not(empty($externalElements))) | |
228 then true() | |
229 else false() | |
230 let $returnPageFragmentTmpp := | |
231 if (contains($options, "withXmlNodeId") or $containsExternalElements) | |
232 then mpdl-text:insertNodeIdAttribute($returnPageFragmentTmp/*[1]) | |
233 else $returnPageFragmentTmp | |
234 | |
235 let $returnPageFragment := | |
236 if($containsExternalElements) | |
237 then mpdl-text:insert($returnPageFragmentTmpp/*[1], $externalElements) | |
238 else $returnPageFragmentTmpp | |
239 | |
240 let $pageFigureAnchors := $returnPageFragment//anchor[@type = 'figure'] | |
241 let $pageFigures := | |
242 for $pageFigureAnchor in $pageFigureAnchors | |
243 let $href := string($pageFigureAnchor/@xlink:href) | |
244 let $pageFigureTmp := $document//echo:figure[@xlink:label = $href] | |
245 let $pageFigure := subsequence($pageFigureTmp, 1, 1) | |
246 return | |
247 $pageFigure | |
248 let $pageHandwrittenAnchors := $returnPageFragment//anchor[@type = 'handwritten'] | |
249 let $pageHandwritten := | |
250 for $pageHandwrittenAnchor in $pageHandwrittenAnchors | |
251 let $handwrittenHref := string($pageHandwrittenAnchor/@xlink:href) | |
252 let $pageHandwrittenTmp := $document//echo:handwritten[@xlink:label = $handwrittenHref] | |
253 let $pageHandwritten := subsequence($pageHandwrittenTmp, 1, 1) | |
254 return | |
255 $pageHandwritten | |
256 let $pageTableAnchors := $returnPageFragment//anchor[@type = 'table'] | |
257 let $pageTables := | |
258 for $pageTableAnchor in $pageTableAnchors | |
259 let $tableHref := string($pageTableAnchor/@xlink:href) | |
260 let $pageTableTmp := $document//xhtml:table[@xlink:label = $tableHref] | |
261 let $pageTable := subsequence($pageTableTmp, 1, 1) | |
262 return | |
263 $pageTable | |
264 let $pageNoteAnchors := $returnPageFragment//anchor[@type = 'note'] | |
265 let $pageNotes := | |
266 if ($docbase = "echo") | |
267 then | |
268 for $pageNoteAnchor in $pageNoteAnchors | |
269 let $noteHref := string($pageNoteAnchor/@xlink:href) | |
270 let $pageNoteTmp := $document//echo:note[@xlink:label = $noteHref] | |
271 let $pageNote := subsequence($pageNoteTmp, 1, 1) | |
272 return | |
273 $pageNote | |
274 else | |
275 $returnPageFragment//note | |
276 | |
277 (: Metadata handling: only metadata of the selected document is scanned :) | |
278 let $identifier := $documentIdentifier | |
279 let $authors := mpdl-lucene:getElementsByAttr($metadata, $docbase, "author") | |
280 let $titles := mpdl-lucene:getElementsByAttr($metadata, $docbase, "title") | |
281 let $places := mpdl-lucene:getElementsByAttr($metadata, $docbase, "place") | |
282 let $date := mpdl-lucene:getElementsByAttr($metadata, $docbase, "date") | |
283 let $rights := mpdl-lucene:getElementsByAttr($metadata, $docbase, "rights") | |
284 let $accessRights := mpdl-lucene:getElementsByAttr($metadata, $docbase, "accessRights") | |
285 let $licenses := mpdl-lucene:getElementsByAttr($metadata, $docbase, "license") | |
286 let $file := mpdl-lucene:getElementsByAttr($metadata, $docbase, "file") | |
287 let $translator := mpdl-lucene:getElementsByAttr($metadata, $docbase, "translator") | |
288 let $version := mpdl-lucene:getElementsByAttr($metadata, $docbase, "version") | |
289 | |
290 let $highlightQueryWordsTemp := | |
291 if ($highlightQuery != '') | |
292 then mpdltext:get-query-morph-forms($language, $highlightQuery) | |
293 else '' | |
294 let $highlightQueryRegularizations := | |
295 if ($highlightQuery != '') | |
296 then mpdltext:get-query-regularizations($language, $highlightQuery) | |
297 else '' | |
298 let $highlightQueryWords := | |
299 if ($highlightQueryWordsTemp != '' and $highlightQueryRegularizations = '') | |
300 then $highlightQueryWordsTemp | |
301 else if ($highlightQueryWordsTemp = '' and $highlightQueryRegularizations != '') | |
302 then $highlightQueryRegularizations | |
303 else if ($highlightQueryWordsTemp != '' and $highlightQueryRegularizations != '') | |
304 then concat($highlightQueryWordsTemp, '|', $highlightQueryRegularizations) | |
305 else () | |
306 | |
307 let $currentTimeEnd := util:system-time() | |
308 let $neededTime := mpdl-time:duration-as-ms($currentTimeEnd - $currentTimeBegin) | |
309 | |
310 let $xmlResult := | |
311 if ($errorCode < 10) | |
312 then | |
313 <result> | |
314 <document-description> | |
315 <uri>{$mpdlDocUri}</uri> | |
316 <collection-name>{$docbase}</collection-name> | |
317 <document-name>{$documentName}</document-name> | |
318 <language>{$language}</language> | |
319 <authors>{$authors}</authors> | |
320 <titles>{$titles}</titles> | |
321 <places>{$places}</places> | |
322 <date>{$date}</date> | |
323 <identifier>{$identifier}</identifier> | |
324 <rights>{$rights}</rights> | |
325 <accessRights>{$accessRights}</accessRights> | |
326 <licenses>{$licenses}</licenses> | |
327 <file>{$file}</file> | |
328 <translator>{$translator}</translator> | |
329 <version>{$version}</version> | |
330 <count-pages>{$countPages}</count-pages> | |
331 <count-places>{$countGisPlaces}</count-places> | |
332 <count-toc-entries>{$countTocEntries}</count-toc-entries> | |
333 <count-figure-entries>{$countFigureEntries}</count-figure-entries> | |
334 </document-description> | |
335 <page> | |
336 <mode>{$mode}</mode> | |
337 <number>{$pn}</number> | |
338 <header>{$pageHeader}</header> | |
339 <number-orig>{$pageNumberOrig}</number-orig> | |
340 <sentence-number>{$sn}</sentence-number> | |
341 <digilib-available>{$digilibAvailable}</digilib-available> | |
342 <image-available>{$imageIsAvailable}</image-available> | |
343 <image-file-name>{$imageFileName}</image-file-name> | |
344 {$imageEcho} | |
345 {$imageScaler} | |
346 <xml-url>?document={$documentName}&pn={$pn}&mode=xml</xml-url> | |
347 <page-image-directory>{$imagesDocDirectory}/{$pageImageDirectory}</page-image-directory> | |
348 <figures-image-directory>{$imagesDocDirectory}/{$figuresImageDirectory}</figures-image-directory> | |
349 <firstFigurePosition>{$positionOfFirstFigureAfterPB1}</firstFigurePosition> | |
350 <figures>{$pageFigures}</figures> | |
351 <handwritten>{$pageHandwritten}</handwritten> | |
352 <tables>{$pageTables}</tables> | |
353 <notes>{$pageNotes}</notes> | |
354 <highlights> | |
355 <query>{$highlightQuery}</query> | |
356 <words>{$highlightQueryWords}</words> | |
357 </highlights> | |
358 <content>{$returnPageFragment}</content> | |
359 <character-normalization>{$charNorm}</character-normalization> | |
360 <options>{$options}</options> | |
361 </page> | |
362 <performance>{$neededTime}</performance> | |
363 </result> | |
364 else if ($errorCode = 10) | |
365 then <error><number>{$errorCode}</number><description>Fulltext document: {$mpdlDocUri} is not available yet</description></error> | |
366 else if ($errorCode = 11) | |
367 then <error><number>{$errorCode}</number><description>No result: Page {$pn} not found</description></error> | |
368 else if ($errorCode = 12) | |
369 then <error><number>{$errorCode}</number><description>View mode {$mode} not available</description></error> | |
370 else <error><number>{$errorCode}</number><description>undefined error: {$errorCode}</description></error> | |
371 | |
372 let $declare := | |
373 if ($errorCode > 9 or $mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml") | |
374 then util:declare-option("exist:serialize", "method=xhtml media-type=text/html omit-xml-declaration=no indent=yes encoding=utf-8") | |
375 else if ($mode = "pureXml") | |
376 then util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8") | |
377 else util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8") | |
378 let $xslFilePath := | |
379 if($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml") | |
380 then concat($presentationPath, "/pageFragmentHtml.xsl") | |
381 else concat($presentationPath, "/pageXml.xsl") | |
382 | |
383 let $titleStr := concat(string-join($authors, ', '), ". ", string-join($titles, ', '), ". ", string-join($places, ', '), " ", $date, ".") | |
384 let $tmpResult := | |
385 if ($errorCode < 10 and $reqExport = "pdf") | |
386 then mpdl-text:html2pdf($language, $xmlResult, $xslFilePath, $titleStr, $pn, $mode) | |
387 else if ($errorCode < 10 and not($reqExport = "pdf")) | |
388 then mpdl-text:transform($xmlResult, $xslFilePath) | |
389 else | |
390 <div>{$xmlResult}</div> (: error xml result :) | |
391 let $result := | |
392 if ($errorCode < 10 and $reqExport = "pdf") | |
393 then response:stream-binary($tmpResult, "application/pdf", concat($documentName, "-page", $pn, ".pdf")) | |
394 else $tmpResult | |
395 | |
396 let $setHeader := | |
397 if ($mode = "pureXml") | |
398 then response:set-header('Content-Disposition', concat('filename=', $documentName, '-page', $pn)) | |
399 else () | |
400 | |
401 return $result |