Mercurial > hg > mpdl-group
comparison software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-in-one-doc.xql @ 7:5589d865af7a
Erstellung XQL/XSL Applikation
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 15:16:46 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
6:2396a569e446 | 7:5589d865af7a |
---|---|
1 xquery version "1.0"; | |
2 | |
3 declare namespace request="http://exist-db.org/xquery/request"; | |
4 declare namespace string-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/string-util"; | |
5 declare namespace time-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/time-util"; | |
6 | |
7 declare function time-util:duration-as-ms($t) { | |
8 round((minutes-from-duration($t) * 60 + seconds-from-duration($t)) * 1000 ) | |
9 }; | |
10 | |
11 declare function string-util:getWords($strInput as xs:string?) as xs:string* { | |
12 let $wordDelim := "[,;.\s]+" | |
13 (: let $words := fn:tokenize($strInput, $wordDelim, "i") :) | |
14 let $words := mpdltext:getWords($strInput, $wordDelim, "i") | |
15 let $distinctWords := fn:distinct-values($words) | |
16 return $distinctWords | |
17 }; | |
18 | |
19 declare function string-util:putCommaBetween($elems as element()*) as element()* { | |
20 let $count := count($elems) | |
21 for $elem at $pos in $elems | |
22 let $ret := if ($pos < $count and not(empty($elem/text()))) then | |
23 <w>{$elem}, </w> | |
24 else | |
25 $elem | |
26 return $ret | |
27 }; | |
28 | |
29 declare function string-util:getSortedLinkedWords($words as xs:string*, $language as xs:string) as element()* { | |
30 let $count := count($words) | |
31 let $dictURLPart1 := "http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/dict?step=table;word=" | |
32 for $word in $words | |
33 let $lowerCaseWord := fn:lower-case($word) | |
34 let $wordLength := string-length($lowerCaseWord) | |
35 let $linkedWord := <a href="{$dictURLPart1}{$lowerCaseWord};lang={$language};pro=echo">{$lowerCaseWord}</a> | |
36 let $ret := if ($wordLength > 0) then | |
37 $linkedWord | |
38 else | |
39 <a></a> | |
40 order by $lowerCaseWord | |
41 return $ret | |
42 }; | |
43 | |
44 declare function string-util:toSortedLinkedWords($strInput as xs:string?, $language as xs:string?) as element()* { | |
45 let $words := string-util:getWords($strInput) | |
46 let $wordsWithLinks := string-util:getSortedLinkedWords($words, $language) | |
47 let $commaSep := string-util:putCommaBetween($wordsWithLinks) | |
48 return $commaSep | |
49 }; | |
50 | |
51 declare function string-util:getDummyDocument() as node() { | |
52 let $bla := | |
53 document { | |
54 element product { | |
55 attribute dept { "ACC" }, | |
56 element number { 563 }, | |
57 element name { attribute language {"en"}, "Floppy Sun Hat"} | |
58 }, | |
59 element product { | |
60 attribute dept { "BCC" }, | |
61 element number { 564 }, | |
62 element name { attribute language {"en"}, "Floppy SBun Iat"} | |
63 } | |
64 } | |
65 return $bla | |
66 }; | |
67 | |
68 let $currentTimeBegin := util:system-time() | |
69 let $lang := request:get-parameter("lang", "0") | |
70 let $language := request:get-parameter("language", "fr") | |
71 let $document := request:get-parameter("document", "alber_archi_003_en_1755") | |
72 | |
73 let $tempArchimedesDocPath := | |
74 if ($lang = "0") | |
75 then "/db/archimedes" | |
76 else concat("/db/arch/", $language) | |
77 let $archimedesDocPath := concat($tempArchimedesDocPath, "/", $document, ".xml") | |
78 let $archDoc := doc($archimedesDocPath) | |
79 | |
80 let $lucene-query := | |
81 if ($lang = "0") | |
82 then request:get-parameter("ft-query", "Illuſtriſsimi") | |
83 else request:get-parameter("ft-lang-query", "Illuſtriſsimi") | |
84 | |
85 let $resultElems := | |
86 for $s at $pos in $archDoc//s[ft:query(., $lucene-query)] | |
87 let $documentName := util:document-name($s) | |
88 let $collectionName := util:collection-name($s) | |
89 let $fullDocName := concat($collectionName, "/", $documentName) | |
90 let $docRoot := doc($fullDocName) | |
91 let $sArchInfo := $s/root()/archimedes/info | |
92 let $author := string($sArchInfo/author/text()) | |
93 let $language := string($sArchInfo/lang/text()) | |
94 let $dictLinks := string-util:toSortedLinkedWords($s, $language) | |
95 let $pnOfS := count($docRoot//pb[. << $s]) (: faster: comparision only in pb elements of this document :) | |
96 (: let $pnOfS := count($s/preceding::pb[util:document-name(root()) = $documentName]) too slow: comparision in pb elements of all found documents :) | |
97 let $posOfS := count($docRoot//pb[$pnOfS]/following::s[. << $s]) + 1 (: faster: comparision only in s elements of this document :) | |
98 let $resultElem := | |
99 <elem> | |
100 <pos>{$pos}</pos> | |
101 <full-doc>{$fullDocName}</full-doc> | |
102 <name>{$documentName}</name> | |
103 <pn>{$pnOfS}</pn> | |
104 <pos-of-s>{$posOfS}</pos-of-s> | |
105 {$s} | |
106 <links>{$dictLinks}</links> | |
107 </elem> | |
108 order by $author, $pos | |
109 return $resultElem | |
110 | |
111 let $countElems := count($resultElems) | |
112 let $countPages := $countElems idiv 10 + 1 | |
113 | |
114 let $pn := fn:number(request:get-parameter("pn", "1")) | |
115 let $positionFrom := xs:integer((($pn - 1) * 10) + 1) | |
116 let $positionTo := | |
117 if ($pn = $countPages) | |
118 then $countElems | |
119 else $pn * 10 | |
120 | |
121 let $pagesURLs := | |
122 if ($lang = "0") | |
123 then concat("?ft-query=", $lucene-query) | |
124 else concat("?ft-lang-query=", $lucene-query, "&lang=1&language=", $language) | |
125 | |
126 let $countPagesURLs := | |
127 for $i in (1 to $countPages) | |
128 let $pageURL := | |
129 if ($i = $pn) | |
130 then ($i, " ") | |
131 else (<a href="{$pagesURLs}&pn={$i}">{$i}</a>, " ") | |
132 return $pageURL | |
133 | |
134 let $pageResult := | |
135 for $elem at $pos in $resultElems | |
136 let $hitPos := xs:integer($elem/pos) | |
137 let $pnOfS := xs:integer($elem/pn) | |
138 let $posOfS := xs:integer($elem/pos-of-s) | |
139 let $docName := $elem/name | |
140 let $linkPageQuery := (<a href="page-query-result.xql?document={$docName}&pn={$pnOfS}&mode=xml">Page {$pnOfS}</a>) | |
141 let $hitLI := | |
142 <li value="{$hitPos}"> | |
143 {$linkPageQuery}, Sentence: {$posOfS}<br></br> | |
144 <b>Sentence: </b>{$elem/s/text()}<br></br> | |
145 <b>Dictionary links: </b>{$elem/links}<br></br> | |
146 </li> | |
147 where $pos >= $positionFrom and $pos <= $positionTo | |
148 return $hitLI | |
149 | |
150 let $currentTimeEnd := util:system-time() | |
151 let $neededTime := time-util:duration-as-ms($currentTimeEnd - $currentTimeBegin) | |
152 | |
153 | |
154 return | |
155 <html> | |
156 <head> | |
157 <title>Result for your query: "{$lucene-query}"</title> | |
158 </head> | |
159 <body> | |
160 <h2>Result of query: "{$lucene-query}"</h2> | |
161 {$positionFrom}-{$positionTo} of {$countElems} results. Page: {$countPagesURLs} | |
162 <ol> | |
163 {$pageResult} | |
164 </ol> | |
165 <hr></hr> | |
166 <p></p> | |
167 Elapsed time: {$neededTime} ms, Back to <a href="query.xql">query page</a>, see the <a href="fulltext-query-result.xql?_source=yes">XQuery source</a> of this page | |
168 </body> | |
169 </html> |