comparison software/eXist/webapp/mpdl/_stuff/testDev/kwic.xql @ 7:5589d865af7a

Erstellung XQL/XSL Applikation
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 08 Feb 2011 15:16:46 +0100
parents
children
comparison
equal deleted inserted replaced
6:2396a569e446 7:5589d865af7a
1 (: module namespace kwic="http://exist-db.org/xquery/kwic"; :)
2 module namespace kwic="/exist/xquery/archimedes/kwic";
3
4 declare variable $kwic:CHARS_SUMMARY := 120;
5 declare variable $kwic:CHARS_KWIC := 40;
6
7 (:~
8 Retrieve the following and preceding text chunks for a given match.
9
10 @param $match the text node containing the match
11 @param $mode the selection mode: either "previous" or "following"
12 :)
13 declare function kwic:get-context($match as element(exist:match), $mode as xs:string) as node()* {
14 let $sibs :=
15 if ($mode eq 'previous') then
16 $match/preceding::text()
17 else
18 $match/text()/following::text()
19 for $sib in $sibs
20 return
21 if ($sib/parent::exist:match) then
22 <span class="hi">{$sib}</span>
23 else
24 $sib
25 };
26
27 (:~
28 Like fn:substring, but takes a node argument. If the node is an element,
29 a new element is created with the same node-name as the old one and the
30 shortened text content.
31 :)
32 declare function kwic:substring($node as node(), $start as xs:int, $count as xs:int) as item()? {
33 let $str := substring($node, $start, $count)
34 return
35 if ($node instance of element()) then
36 element { node-name($node) } { $str }
37 else
38 $str
39 };
40
41 (:~
42 Generate the left-hand context of the match. Returns a sequence of nodes
43 and strings, whose total string length is less than or equal to $max characters.
44
45 Note: this function calls itself recursively until $nodes is empty or
46 the returned sequence has the desired total string length.
47 :)
48 declare function kwic:truncate-previous($nodes as node()*, $truncated as item()*,
49 $max as xs:int, $chars as xs:int) {
50 if ($nodes) then
51 let $next := $nodes[last()]
52 return
53 if ($chars + string-length($next) gt $max) then
54 let $remaining := $max - $chars
55 return
56 ("...", kwic:substring($next, string-length($next) - $remaining, $remaining), $truncated)
57 else
58 kwic:truncate-previous(subsequence($nodes, 1, count($nodes) - 1), ($next, $truncated),
59 $max, $chars + string-length($next))
60 else
61 $truncated
62 };
63
64 (:~
65 Generate the right-hand context of the match. Returns a sequence of nodes
66 and strings, whose total string length is less than or equal to $max characters.
67
68 Note: this function calls itself recursively until $nodes is empty or
69 the returned sequence has the desired total string length.
70 :)
71 declare function kwic:truncate-following($nodes as node()*, $truncated as item()*, $max as xs:int, $chars as xs:int) {
72 if ($nodes) then
73 let $next := $nodes[1]
74 return
75 if ($chars + string-length($next) gt $max) then
76 let $remaining := $max - $chars
77 return
78 ($truncated, kwic:substring($next, 1, $remaining), "...")
79 else
80 kwic:truncate-following(subsequence($nodes, 2), ($truncated, $next),
81 $max, $chars + string-length($next))
82 else
83 $truncated
84 };
85
86 (:~
87 Computes the total string length of the nodes in the argument sequence
88 :)
89 declare function kwic:string-length($nodes as item()*) as xs:int {
90 if (exists($nodes)) then
91 sum(for $n in $nodes return string-length($n))
92 else
93 0
94 };
95
96 (:~
97 Print a summary of the match in $node. Output a predefined amount of text to
98 the left and the right of the match.
99
100 @param $root the root element containing the match. This is the original element
101 stored in the database.
102 @param $node the exist:match element to process.
103 @param $config configuration element which determines the behaviour of the function
104 :)
105 declare function kwic:get-summary($root as node(), $node as element(exist:match), $config as element(config)) as element() {
106 let $chars := xs:int($config/@width)
107 let $table := $config/@table = ('yes', 'true')
108 let $prev := kwic:get-context($node, 'previous')
109 let $prevTrunc := kwic:truncate-previous($prev, (), $chars, 0)
110 let $remain :=
111 if (not($table)) then
112 $chars * 2 - kwic:string-length($prevTrunc)
113 else
114 $chars
115 let $following := kwic:get-context($node, 'following')
116 let $followingTrunc := kwic:truncate-following($following, (), $remain, 0)
117 return
118 if (not($table)) then
119 <p xmlns="http://www.w3.org/1999/xhtml">
120 <span class="previous">{$prevTrunc}</span>
121 {
122 if ($config/@link) then
123 <a class="hi" href="{$config/@link}">{ $node/text() }</a>
124 else
125 <span class="hi">{ $node/text() }</span>
126 }
127 <span class="following">{$followingTrunc}</span>
128 </p>
129 else
130 <tr xmlns="http://www.w3.org/1999/xhtml">
131 <td class="previous">{$prevTrunc}</td>
132 <td class="hi">
133 {
134 if ($config/@link) then
135 <a href="{$config/@link}">{$node/text()}</a>
136 else
137 $node/text()
138 }
139 </td>
140 <td class="following">{$followingTrunc}</td>
141 </tr>
142 };
143
144 (:~
145 Main function of the KWIC module: takes the passed element and returns an
146 XHTML fragment containing a chunk of text before and after the first full text
147 match in the node.
148
149 The optional config parameter is used to configure the behaviour of the function:
150
151 &lt;config width="character width" table="yes|no" link="URL to which the match is linked"/&gt;
152
153 By default, kwic:summarize returns an XHTML fragment with the following structure:
154
155 &lt;p xmlns="http://www.w3.org/1999/xhtml"&gt;
156 &lt;span class="previous"&gt;Text before match&lt;/span&gt;
157 &lt;a href="passed URL if any" class="hi"&gt;The highlighted term&lt;/a&gt;
158 &lt;span class="following"&gt;Text after match&lt;/span&gt;
159 &lt;/p&gt;
160
161 If table=yes is passed with the config element, a tr table row will be returned instead
162 of a span (using the same class names).
163
164 @param $hit an arbitrary XML element which has been selected by one of the full text
165 operations or an ngram search.
166 @param $config configuration element to configure the behaviour of the function
167 :)
168 declare function kwic:summarize($hit as element(), $config as element(config))
169 as element()* {
170 let $expanded := util:expand($hit)
171 for $match in $expanded//exist:match[1]
172 return
173 kwic:get-summary($hit, $match, $config)
174 };