Mercurial > hg > mpdl-group
comparison software/eXist/webapp/mpdl/_stuff/testDev/kwic.xql @ 7:5589d865af7a
Erstellung XQL/XSL Applikation
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 15:16:46 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
6:2396a569e446 | 7:5589d865af7a |
---|---|
1 (: module namespace kwic="http://exist-db.org/xquery/kwic"; :) | |
2 module namespace kwic="/exist/xquery/archimedes/kwic"; | |
3 | |
4 declare variable $kwic:CHARS_SUMMARY := 120; | |
5 declare variable $kwic:CHARS_KWIC := 40; | |
6 | |
7 (:~ | |
8 Retrieve the following and preceding text chunks for a given match. | |
9 | |
10 @param $match the text node containing the match | |
11 @param $mode the selection mode: either "previous" or "following" | |
12 :) | |
13 declare function kwic:get-context($match as element(exist:match), $mode as xs:string) as node()* { | |
14 let $sibs := | |
15 if ($mode eq 'previous') then | |
16 $match/preceding::text() | |
17 else | |
18 $match/text()/following::text() | |
19 for $sib in $sibs | |
20 return | |
21 if ($sib/parent::exist:match) then | |
22 <span class="hi">{$sib}</span> | |
23 else | |
24 $sib | |
25 }; | |
26 | |
27 (:~ | |
28 Like fn:substring, but takes a node argument. If the node is an element, | |
29 a new element is created with the same node-name as the old one and the | |
30 shortened text content. | |
31 :) | |
32 declare function kwic:substring($node as node(), $start as xs:int, $count as xs:int) as item()? { | |
33 let $str := substring($node, $start, $count) | |
34 return | |
35 if ($node instance of element()) then | |
36 element { node-name($node) } { $str } | |
37 else | |
38 $str | |
39 }; | |
40 | |
41 (:~ | |
42 Generate the left-hand context of the match. Returns a sequence of nodes | |
43 and strings, whose total string length is less than or equal to $max characters. | |
44 | |
45 Note: this function calls itself recursively until $nodes is empty or | |
46 the returned sequence has the desired total string length. | |
47 :) | |
48 declare function kwic:truncate-previous($nodes as node()*, $truncated as item()*, | |
49 $max as xs:int, $chars as xs:int) { | |
50 if ($nodes) then | |
51 let $next := $nodes[last()] | |
52 return | |
53 if ($chars + string-length($next) gt $max) then | |
54 let $remaining := $max - $chars | |
55 return | |
56 ("...", kwic:substring($next, string-length($next) - $remaining, $remaining), $truncated) | |
57 else | |
58 kwic:truncate-previous(subsequence($nodes, 1, count($nodes) - 1), ($next, $truncated), | |
59 $max, $chars + string-length($next)) | |
60 else | |
61 $truncated | |
62 }; | |
63 | |
64 (:~ | |
65 Generate the right-hand context of the match. Returns a sequence of nodes | |
66 and strings, whose total string length is less than or equal to $max characters. | |
67 | |
68 Note: this function calls itself recursively until $nodes is empty or | |
69 the returned sequence has the desired total string length. | |
70 :) | |
71 declare function kwic:truncate-following($nodes as node()*, $truncated as item()*, $max as xs:int, $chars as xs:int) { | |
72 if ($nodes) then | |
73 let $next := $nodes[1] | |
74 return | |
75 if ($chars + string-length($next) gt $max) then | |
76 let $remaining := $max - $chars | |
77 return | |
78 ($truncated, kwic:substring($next, 1, $remaining), "...") | |
79 else | |
80 kwic:truncate-following(subsequence($nodes, 2), ($truncated, $next), | |
81 $max, $chars + string-length($next)) | |
82 else | |
83 $truncated | |
84 }; | |
85 | |
86 (:~ | |
87 Computes the total string length of the nodes in the argument sequence | |
88 :) | |
89 declare function kwic:string-length($nodes as item()*) as xs:int { | |
90 if (exists($nodes)) then | |
91 sum(for $n in $nodes return string-length($n)) | |
92 else | |
93 0 | |
94 }; | |
95 | |
96 (:~ | |
97 Print a summary of the match in $node. Output a predefined amount of text to | |
98 the left and the right of the match. | |
99 | |
100 @param $root the root element containing the match. This is the original element | |
101 stored in the database. | |
102 @param $node the exist:match element to process. | |
103 @param $config configuration element which determines the behaviour of the function | |
104 :) | |
105 declare function kwic:get-summary($root as node(), $node as element(exist:match), $config as element(config)) as element() { | |
106 let $chars := xs:int($config/@width) | |
107 let $table := $config/@table = ('yes', 'true') | |
108 let $prev := kwic:get-context($node, 'previous') | |
109 let $prevTrunc := kwic:truncate-previous($prev, (), $chars, 0) | |
110 let $remain := | |
111 if (not($table)) then | |
112 $chars * 2 - kwic:string-length($prevTrunc) | |
113 else | |
114 $chars | |
115 let $following := kwic:get-context($node, 'following') | |
116 let $followingTrunc := kwic:truncate-following($following, (), $remain, 0) | |
117 return | |
118 if (not($table)) then | |
119 <p xmlns="http://www.w3.org/1999/xhtml"> | |
120 <span class="previous">{$prevTrunc}</span> | |
121 { | |
122 if ($config/@link) then | |
123 <a class="hi" href="{$config/@link}">{ $node/text() }</a> | |
124 else | |
125 <span class="hi">{ $node/text() }</span> | |
126 } | |
127 <span class="following">{$followingTrunc}</span> | |
128 </p> | |
129 else | |
130 <tr xmlns="http://www.w3.org/1999/xhtml"> | |
131 <td class="previous">{$prevTrunc}</td> | |
132 <td class="hi"> | |
133 { | |
134 if ($config/@link) then | |
135 <a href="{$config/@link}">{$node/text()}</a> | |
136 else | |
137 $node/text() | |
138 } | |
139 </td> | |
140 <td class="following">{$followingTrunc}</td> | |
141 </tr> | |
142 }; | |
143 | |
144 (:~ | |
145 Main function of the KWIC module: takes the passed element and returns an | |
146 XHTML fragment containing a chunk of text before and after the first full text | |
147 match in the node. | |
148 | |
149 The optional config parameter is used to configure the behaviour of the function: | |
150 | |
151 <config width="character width" table="yes|no" link="URL to which the match is linked"/> | |
152 | |
153 By default, kwic:summarize returns an XHTML fragment with the following structure: | |
154 | |
155 <p xmlns="http://www.w3.org/1999/xhtml"> | |
156 <span class="previous">Text before match</span> | |
157 <a href="passed URL if any" class="hi">The highlighted term</a> | |
158 <span class="following">Text after match</span> | |
159 </p> | |
160 | |
161 If table=yes is passed with the config element, a tr table row will be returned instead | |
162 of a span (using the same class names). | |
163 | |
164 @param $hit an arbitrary XML element which has been selected by one of the full text | |
165 operations or an ngram search. | |
166 @param $config configuration element to configure the behaviour of the function | |
167 :) | |
168 declare function kwic:summarize($hit as element(), $config as element(config)) | |
169 as element()* { | |
170 let $expanded := util:expand($hit) | |
171 for $match in $expanded//exist:match[1] | |
172 return | |
173 kwic:get-summary($hit, $match, $config) | |
174 }; |