comparison software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-in-one-doc.xql @ 7:5589d865af7a

Erstellung XQL/XSL Applikation
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 08 Feb 2011 15:16:46 +0100
parents
children
comparison
equal deleted inserted replaced
6:2396a569e446 7:5589d865af7a
1 xquery version "1.0";
2
3 declare namespace request="http://exist-db.org/xquery/request";
4 declare namespace string-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/string-util";
5 declare namespace time-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/time-util";
6
7 declare function time-util:duration-as-ms($t) {
8 round((minutes-from-duration($t) * 60 + seconds-from-duration($t)) * 1000 )
9 };
10
11 declare function string-util:getWords($strInput as xs:string?) as xs:string* {
12 let $wordDelim := "[,;.\s]+"
13 (: let $words := fn:tokenize($strInput, $wordDelim, "i") :)
14 let $words := mpdltext:getWords($strInput, $wordDelim, "i")
15 let $distinctWords := fn:distinct-values($words)
16 return $distinctWords
17 };
18
19 declare function string-util:putCommaBetween($elems as element()*) as element()* {
20 let $count := count($elems)
21 for $elem at $pos in $elems
22 let $ret := if ($pos < $count and not(empty($elem/text()))) then
23 <w>{$elem}, </w>
24 else
25 $elem
26 return $ret
27 };
28
29 declare function string-util:getSortedLinkedWords($words as xs:string*, $language as xs:string) as element()* {
30 let $count := count($words)
31 let $dictURLPart1 := "http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/dict?step=table;word="
32 for $word in $words
33 let $lowerCaseWord := fn:lower-case($word)
34 let $wordLength := string-length($lowerCaseWord)
35 let $linkedWord := <a href="{$dictURLPart1}{$lowerCaseWord};lang={$language};pro=echo">{$lowerCaseWord}</a>
36 let $ret := if ($wordLength > 0) then
37 $linkedWord
38 else
39 <a></a>
40 order by $lowerCaseWord
41 return $ret
42 };
43
44 declare function string-util:toSortedLinkedWords($strInput as xs:string?, $language as xs:string?) as element()* {
45 let $words := string-util:getWords($strInput)
46 let $wordsWithLinks := string-util:getSortedLinkedWords($words, $language)
47 let $commaSep := string-util:putCommaBetween($wordsWithLinks)
48 return $commaSep
49 };
50
51 declare function string-util:getDummyDocument() as node() {
52 let $bla :=
53 document {
54 element product {
55 attribute dept { "ACC" },
56 element number { 563 },
57 element name { attribute language {"en"}, "Floppy Sun Hat"}
58 },
59 element product {
60 attribute dept { "BCC" },
61 element number { 564 },
62 element name { attribute language {"en"}, "Floppy SBun Iat"}
63 }
64 }
65 return $bla
66 };
67
68 let $currentTimeBegin := util:system-time()
69 let $lang := request:get-parameter("lang", "0")
70 let $language := request:get-parameter("language", "fr")
71 let $document := request:get-parameter("document", "alber_archi_003_en_1755")
72
73 let $tempArchimedesDocPath :=
74 if ($lang = "0")
75 then "/db/archimedes"
76 else concat("/db/arch/", $language)
77 let $archimedesDocPath := concat($tempArchimedesDocPath, "/", $document, ".xml")
78 let $archDoc := doc($archimedesDocPath)
79
80 let $lucene-query :=
81 if ($lang = "0")
82 then request:get-parameter("ft-query", "Illuſtriſsimi")
83 else request:get-parameter("ft-lang-query", "Illuſtriſsimi")
84
85 let $resultElems :=
86 for $s at $pos in $archDoc//s[ft:query(., $lucene-query)]
87 let $documentName := util:document-name($s)
88 let $collectionName := util:collection-name($s)
89 let $fullDocName := concat($collectionName, "/", $documentName)
90 let $docRoot := doc($fullDocName)
91 let $sArchInfo := $s/root()/archimedes/info
92 let $author := string($sArchInfo/author/text())
93 let $language := string($sArchInfo/lang/text())
94 let $dictLinks := string-util:toSortedLinkedWords($s, $language)
95 let $pnOfS := count($docRoot//pb[. << $s]) (: faster: comparision only in pb elements of this document :)
96 (: let $pnOfS := count($s/preceding::pb[util:document-name(root()) = $documentName]) too slow: comparision in pb elements of all found documents :)
97 let $posOfS := count($docRoot//pb[$pnOfS]/following::s[. << $s]) + 1 (: faster: comparision only in s elements of this document :)
98 let $resultElem :=
99 <elem>
100 <pos>{$pos}</pos>
101 <full-doc>{$fullDocName}</full-doc>
102 <name>{$documentName}</name>
103 <pn>{$pnOfS}</pn>
104 <pos-of-s>{$posOfS}</pos-of-s>
105 {$s}
106 <links>{$dictLinks}</links>
107 </elem>
108 order by $author, $pos
109 return $resultElem
110
111 let $countElems := count($resultElems)
112 let $countPages := $countElems idiv 10 + 1
113
114 let $pn := fn:number(request:get-parameter("pn", "1"))
115 let $positionFrom := xs:integer((($pn - 1) * 10) + 1)
116 let $positionTo :=
117 if ($pn = $countPages)
118 then $countElems
119 else $pn * 10
120
121 let $pagesURLs :=
122 if ($lang = "0")
123 then concat("?ft-query=", $lucene-query)
124 else concat("?ft-lang-query=", $lucene-query, "&amp;lang=1&amp;language=", $language)
125
126 let $countPagesURLs :=
127 for $i in (1 to $countPages)
128 let $pageURL :=
129 if ($i = $pn)
130 then ($i, " ")
131 else (<a href="{$pagesURLs}&amp;pn={$i}">{$i}</a>, " ")
132 return $pageURL
133
134 let $pageResult :=
135 for $elem at $pos in $resultElems
136 let $hitPos := xs:integer($elem/pos)
137 let $pnOfS := xs:integer($elem/pn)
138 let $posOfS := xs:integer($elem/pos-of-s)
139 let $docName := $elem/name
140 let $linkPageQuery := (<a href="page-query-result.xql?document={$docName}&amp;pn={$pnOfS}&amp;mode=xml">Page {$pnOfS}</a>)
141 let $hitLI :=
142 <li value="{$hitPos}">
143 {$linkPageQuery}, Sentence: {$posOfS}<br></br>
144 <b>Sentence: </b>{$elem/s/text()}<br></br>
145 <b>Dictionary links: </b>{$elem/links}<br></br>
146 </li>
147 where $pos >= $positionFrom and $pos <= $positionTo
148 return $hitLI
149
150 let $currentTimeEnd := util:system-time()
151 let $neededTime := time-util:duration-as-ms($currentTimeEnd - $currentTimeBegin)
152
153
154 return
155 <html>
156 <head>
157 <title>Result for your query: "{$lucene-query}"</title>
158 </head>
159 <body>
160 <h2>Result of query: "{$lucene-query}"</h2>
161 {$positionFrom}-{$positionTo} of {$countElems} results. Page: {$countPagesURLs}
162 <ol>
163 {$pageResult}
164 </ol>
165 <hr></hr>
166 <p></p>
167 Elapsed time: {$neededTime} ms, Back to <a href="query.xql">query page</a>, see the <a href="fulltext-query-result.xql?_source=yes">XQuery source</a> of this page
168 </body>
169 </html>