Mercurial > hg > mpdl-group
diff software/eXist/webapp/mpdl/_stuff/testDev/kwic.xql @ 7:5589d865af7a
Erstellung XQL/XSL Applikation
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 15:16:46 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/eXist/webapp/mpdl/_stuff/testDev/kwic.xql Tue Feb 08 15:16:46 2011 +0100 @@ -0,0 +1,174 @@ +(: module namespace kwic="http://exist-db.org/xquery/kwic"; :) +module namespace kwic="/exist/xquery/archimedes/kwic"; + +declare variable $kwic:CHARS_SUMMARY := 120; +declare variable $kwic:CHARS_KWIC := 40; + +(:~ + Retrieve the following and preceding text chunks for a given match. + + @param $match the text node containing the match + @param $mode the selection mode: either "previous" or "following" +:) +declare function kwic:get-context($match as element(exist:match), $mode as xs:string) as node()* { + let $sibs := + if ($mode eq 'previous') then + $match/preceding::text() + else + $match/text()/following::text() + for $sib in $sibs + return + if ($sib/parent::exist:match) then + <span class="hi">{$sib}</span> + else + $sib +}; + +(:~ + Like fn:substring, but takes a node argument. If the node is an element, + a new element is created with the same node-name as the old one and the + shortened text content. +:) +declare function kwic:substring($node as node(), $start as xs:int, $count as xs:int) as item()? { + let $str := substring($node, $start, $count) + return + if ($node instance of element()) then + element { node-name($node) } { $str } + else + $str +}; + +(:~ + Generate the left-hand context of the match. Returns a sequence of nodes + and strings, whose total string length is less than or equal to $max characters. + + Note: this function calls itself recursively until $nodes is empty or + the returned sequence has the desired total string length. +:) +declare function kwic:truncate-previous($nodes as node()*, $truncated as item()*, + $max as xs:int, $chars as xs:int) { + if ($nodes) then + let $next := $nodes[last()] + return + if ($chars + string-length($next) gt $max) then + let $remaining := $max - $chars + return + ("...", kwic:substring($next, string-length($next) - $remaining, $remaining), $truncated) + else + kwic:truncate-previous(subsequence($nodes, 1, count($nodes) - 1), ($next, $truncated), + $max, $chars + string-length($next)) + else + $truncated +}; + +(:~ + Generate the right-hand context of the match. Returns a sequence of nodes + and strings, whose total string length is less than or equal to $max characters. + + Note: this function calls itself recursively until $nodes is empty or + the returned sequence has the desired total string length. +:) +declare function kwic:truncate-following($nodes as node()*, $truncated as item()*, $max as xs:int, $chars as xs:int) { + if ($nodes) then + let $next := $nodes[1] + return + if ($chars + string-length($next) gt $max) then + let $remaining := $max - $chars + return + ($truncated, kwic:substring($next, 1, $remaining), "...") + else + kwic:truncate-following(subsequence($nodes, 2), ($truncated, $next), + $max, $chars + string-length($next)) + else + $truncated +}; + +(:~ + Computes the total string length of the nodes in the argument sequence +:) +declare function kwic:string-length($nodes as item()*) as xs:int { + if (exists($nodes)) then + sum(for $n in $nodes return string-length($n)) + else + 0 +}; + +(:~ + Print a summary of the match in $node. Output a predefined amount of text to + the left and the right of the match. + + @param $root the root element containing the match. This is the original element + stored in the database. + @param $node the exist:match element to process. + @param $config configuration element which determines the behaviour of the function +:) +declare function kwic:get-summary($root as node(), $node as element(exist:match), $config as element(config)) as element() { + let $chars := xs:int($config/@width) + let $table := $config/@table = ('yes', 'true') + let $prev := kwic:get-context($node, 'previous') + let $prevTrunc := kwic:truncate-previous($prev, (), $chars, 0) + let $remain := + if (not($table)) then + $chars * 2 - kwic:string-length($prevTrunc) + else + $chars + let $following := kwic:get-context($node, 'following') + let $followingTrunc := kwic:truncate-following($following, (), $remain, 0) + return + if (not($table)) then + <p xmlns="http://www.w3.org/1999/xhtml"> + <span class="previous">{$prevTrunc}</span> + { + if ($config/@link) then + <a class="hi" href="{$config/@link}">{ $node/text() }</a> + else + <span class="hi">{ $node/text() }</span> + } + <span class="following">{$followingTrunc}</span> + </p> + else + <tr xmlns="http://www.w3.org/1999/xhtml"> + <td class="previous">{$prevTrunc}</td> + <td class="hi"> + { + if ($config/@link) then + <a href="{$config/@link}">{$node/text()}</a> + else + $node/text() + } + </td> + <td class="following">{$followingTrunc}</td> + </tr> +}; + +(:~ + Main function of the KWIC module: takes the passed element and returns an + XHTML fragment containing a chunk of text before and after the first full text + match in the node. + + The optional config parameter is used to configure the behaviour of the function: + + <config width="character width" table="yes|no" link="URL to which the match is linked"/> + + By default, kwic:summarize returns an XHTML fragment with the following structure: + + <p xmlns="http://www.w3.org/1999/xhtml"> + <span class="previous">Text before match</span> + <a href="passed URL if any" class="hi">The highlighted term</a> + <span class="following">Text after match</span> + </p> + + If table=yes is passed with the config element, a tr table row will be returned instead + of a span (using the same class names). + + @param $hit an arbitrary XML element which has been selected by one of the full text + operations or an ngram search. + @param $config configuration element to configure the behaviour of the function +:) +declare function kwic:summarize($hit as element(), $config as element(config)) +as element()* { + let $expanded := util:expand($hit) + for $match in $expanded//exist:match[1] + return + kwic:get-summary($hit, $match, $config) +};