Mercurial > hg > mpdl-group
view software/eXist/webapp/mpdl/_stuff/testDev/kwic.xql @ 7:5589d865af7a
Erstellung XQL/XSL Applikation
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 15:16:46 +0100 |
parents | |
children |
line wrap: on
line source
(: module namespace kwic="http://exist-db.org/xquery/kwic"; :) module namespace kwic="/exist/xquery/archimedes/kwic"; declare variable $kwic:CHARS_SUMMARY := 120; declare variable $kwic:CHARS_KWIC := 40; (:~ Retrieve the following and preceding text chunks for a given match. @param $match the text node containing the match @param $mode the selection mode: either "previous" or "following" :) declare function kwic:get-context($match as element(exist:match), $mode as xs:string) as node()* { let $sibs := if ($mode eq 'previous') then $match/preceding::text() else $match/text()/following::text() for $sib in $sibs return if ($sib/parent::exist:match) then <span class="hi">{$sib}</span> else $sib }; (:~ Like fn:substring, but takes a node argument. If the node is an element, a new element is created with the same node-name as the old one and the shortened text content. :) declare function kwic:substring($node as node(), $start as xs:int, $count as xs:int) as item()? { let $str := substring($node, $start, $count) return if ($node instance of element()) then element { node-name($node) } { $str } else $str }; (:~ Generate the left-hand context of the match. Returns a sequence of nodes and strings, whose total string length is less than or equal to $max characters. Note: this function calls itself recursively until $nodes is empty or the returned sequence has the desired total string length. :) declare function kwic:truncate-previous($nodes as node()*, $truncated as item()*, $max as xs:int, $chars as xs:int) { if ($nodes) then let $next := $nodes[last()] return if ($chars + string-length($next) gt $max) then let $remaining := $max - $chars return ("...", kwic:substring($next, string-length($next) - $remaining, $remaining), $truncated) else kwic:truncate-previous(subsequence($nodes, 1, count($nodes) - 1), ($next, $truncated), $max, $chars + string-length($next)) else $truncated }; (:~ Generate the right-hand context of the match. Returns a sequence of nodes and strings, whose total string length is less than or equal to $max characters. Note: this function calls itself recursively until $nodes is empty or the returned sequence has the desired total string length. :) declare function kwic:truncate-following($nodes as node()*, $truncated as item()*, $max as xs:int, $chars as xs:int) { if ($nodes) then let $next := $nodes[1] return if ($chars + string-length($next) gt $max) then let $remaining := $max - $chars return ($truncated, kwic:substring($next, 1, $remaining), "...") else kwic:truncate-following(subsequence($nodes, 2), ($truncated, $next), $max, $chars + string-length($next)) else $truncated }; (:~ Computes the total string length of the nodes in the argument sequence :) declare function kwic:string-length($nodes as item()*) as xs:int { if (exists($nodes)) then sum(for $n in $nodes return string-length($n)) else 0 }; (:~ Print a summary of the match in $node. Output a predefined amount of text to the left and the right of the match. @param $root the root element containing the match. This is the original element stored in the database. @param $node the exist:match element to process. @param $config configuration element which determines the behaviour of the function :) declare function kwic:get-summary($root as node(), $node as element(exist:match), $config as element(config)) as element() { let $chars := xs:int($config/@width) let $table := $config/@table = ('yes', 'true') let $prev := kwic:get-context($node, 'previous') let $prevTrunc := kwic:truncate-previous($prev, (), $chars, 0) let $remain := if (not($table)) then $chars * 2 - kwic:string-length($prevTrunc) else $chars let $following := kwic:get-context($node, 'following') let $followingTrunc := kwic:truncate-following($following, (), $remain, 0) return if (not($table)) then <p xmlns="http://www.w3.org/1999/xhtml"> <span class="previous">{$prevTrunc}</span> { if ($config/@link) then <a class="hi" href="{$config/@link}">{ $node/text() }</a> else <span class="hi">{ $node/text() }</span> } <span class="following">{$followingTrunc}</span> </p> else <tr xmlns="http://www.w3.org/1999/xhtml"> <td class="previous">{$prevTrunc}</td> <td class="hi"> { if ($config/@link) then <a href="{$config/@link}">{$node/text()}</a> else $node/text() } </td> <td class="following">{$followingTrunc}</td> </tr> }; (:~ Main function of the KWIC module: takes the passed element and returns an XHTML fragment containing a chunk of text before and after the first full text match in the node. The optional config parameter is used to configure the behaviour of the function: <config width="character width" table="yes|no" link="URL to which the match is linked"/> By default, kwic:summarize returns an XHTML fragment with the following structure: <p xmlns="http://www.w3.org/1999/xhtml"> <span class="previous">Text before match</span> <a href="passed URL if any" class="hi">The highlighted term</a> <span class="following">Text after match</span> </p> If table=yes is passed with the config element, a tr table row will be returned instead of a span (using the same class names). @param $hit an arbitrary XML element which has been selected by one of the full text operations or an ngram search. @param $config configuration element to configure the behaviour of the function :) declare function kwic:summarize($hit as element(), $config as element(config)) as element()* { let $expanded := util:expand($hit) for $match in $expanded//exist:match[1] return kwic:get-summary($hit, $match, $config) };