Mercurial > hg > mpdl-group
view software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-detail.xql @ 7:5589d865af7a
Erstellung XQL/XSL Applikation
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 15:16:46 +0100 |
parents | |
children |
line wrap: on
line source
xquery version "1.0"; declare namespace request="http://exist-db.org/xquery/request"; declare namespace string-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/string-util"; declare namespace time-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/time-util"; declare function time-util:duration-as-ms($t) { round((minutes-from-duration($t) * 60 + seconds-from-duration($t)) * 1000 ) }; declare function string-util:getWords($strInput as xs:string?) as xs:string* { let $wordDelim := "[,;.\s]+" (: let $words := fn:tokenize($strInput, $wordDelim, "i") :) let $words := mpdltext:getWords($strInput, $wordDelim, "i") let $distinctWords := fn:distinct-values($words) return $distinctWords }; declare function string-util:putCommaBetween($elems as element()*) as element()* { let $count := count($elems) for $elem at $pos in $elems let $ret := if ($pos < $count and not(empty($elem/text()))) then <w>{$elem}, </w> else $elem return $ret }; declare function string-util:getSortedLinkedWords($words as xs:string*, $language as xs:string) as element()* { let $count := count($words) let $dictURLPart1 := "http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/dict?step=table;word=" for $word in $words let $lowerCaseWord := fn:lower-case($word) let $wordLength := string-length($lowerCaseWord) let $linkedWord := <a href="{$dictURLPart1}{$lowerCaseWord};lang={$language};pro=echo">{$lowerCaseWord}</a> let $ret := if ($wordLength > 0) then $linkedWord else <a></a> order by $lowerCaseWord return $ret }; declare function string-util:toSortedLinkedWords($strInput as xs:string?, $language as xs:string?) as element()* { let $words := string-util:getWords($strInput) let $wordsWithLinks := string-util:getSortedLinkedWords($words, $language) let $commaSep := string-util:putCommaBetween($wordsWithLinks) return $commaSep }; declare function string-util:getDummyDocument() as node() { let $bla := document { element product { attribute dept { "ACC" }, element number { 563 }, element name { attribute language {"en"}, "Floppy Sun Hat"} }, element product { attribute dept { "BCC" }, element number { 564 }, element name { attribute language {"en"}, "Floppy SBun Iat"} } } return $bla }; let $currentTimeBegin := util:system-time() let $lang := request:get-parameter("lang", "0") let $language := request:get-parameter("language", "fr") let $document := request:get-parameter("document", "") let $tempArchimedesDocPath := if ($lang = "0") then "/db/archimedes" else concat("/db/arch/", $language) let $archimedesDocPath := if ($document = "") then $tempArchimedesDocPath else concat($tempArchimedesDocPath, "/", $document, ".xml") let $archCollection := if ($document = "") then collection($archimedesDocPath) else doc($archimedesDocPath) let $lucene-query := if ($lang = "0") then request:get-parameter("ft-query", "Illuſtriſsimi") else request:get-parameter("ft-lang-query", "Illuſtriſsimi") let $tempResultElems := for $s at $pos in $archCollection//s[ft:query(., $lucene-query)] let $documentName := util:document-name($s) let $collectionName := util:collection-name($s) let $fullDocName := concat($collectionName, "/", $documentName) let $docRoot := doc($fullDocName) let $sArchInfo := $s/root()/archimedes/info let $author := string($sArchInfo/author/text()) let $language := string($sArchInfo/lang/text()) let $dictLinks := string-util:toSortedLinkedWords($s, $language) let $pnOfS := count($docRoot//pb[. << $s]) (: faster: comparision only in pb elements of this document :) (: let $pnOfS := count($s/preceding::pb[util:document-name(root()) = $documentName]) too slow: comparision in pb elements of all found documents :) let $posOfS := count($docRoot//pb[$pnOfS]/following::s[. << $s]) + 1 (: faster: comparision only in s elements of this document :) let $resultElem := <elem> <pos>{$pos}</pos> <full-doc>{$fullDocName}</full-doc> <pn>{$pnOfS}</pn> <pos-of-s>{$posOfS}</pos-of-s> {$s} <links>{$dictLinks}</links> </elem> order by $author, $pos return $resultElem (: group by operator: group documents with their hits :) let $resultElems := for $full-doc at $docPos in distinct-values($tempResultElems/full-doc) let $docName := substring-before(util:document-name($full-doc), ".") let $docPath := doc($full-doc) let $sArchInfo := $docPath/archimedes/info let $author := string($sArchInfo/author/text()) let $authorLit := concat($author, ". ") let $title := string($sArchInfo/title/text()) let $titleLit := if ($title = "") then "" else concat($title, ". ") let $place := string($sArchInfo/place/text()) let $date := string($sArchInfo/date/text()) let $placeDateLit := if ($place = "" and $date = "") then "" else if ($place != "" and $date = "") then concat($place, ". ") else if ($place = "" and $date != "") then concat($date, ". ") else concat($place, ", ", $date, ". ") let $docShortDesc := ($authorLit, $titleLit, $placeDateLit, " [", <a href="/exist/rest{$full-doc}">XML content</a>, "]") let $sInDocElem := $tempResultElems[full-doc = $full-doc] let $docElem := for $e at $pos in $sInDocElem let $pnOfS := $e/pn let $posOfS := $e/pos-of-s order by $pos return <hit> <pos>{$pos}</pos> <pn>{$pnOfS}</pn> <pos-of-s>{$posOfS}</pos-of-s> {$e/s} {$e/links} </hit> return <doc> <pos>{$docPos}</pos> <name>{$docName}</name> <desc>{$docShortDesc}</desc> <hits> {$docElem} </hits> </doc> let $countElems := count($tempResultElems) let $countPages := $countElems idiv 10 + 1 let $pn := fn:number(request:get-parameter("pn", "1")) let $positionFrom := xs:integer((($pn - 1) * 10) + 1) let $positionTo := if ($pn = $countPages) then $countElems else $pn * 10 let $pagesURLs := if ($lang = "0") then concat("?ft-query=", $lucene-query) else concat("?ft-lang-query=", $lucene-query, "&lang=1&language=", $language) let $countPagesURLs := for $i in (1 to $countPages) let $pageURL := if ($i = $pn) then ($i, " ") else (<a href="{$pagesURLs}&pn={$i}">{$i}</a>, " ") return $pageURL (: let $pageHits := subsequence($resultElems/hits/hit, $positionFrom, $positionTo) does not work correctly !!! :) let $pageHits := $resultElems/hits/hit (: fetch all hit elements :) let $pageResult := for $hit at $pos in $pageHits let $hitFatherDoc := $hit/../.. let $countHitsFatherDoc := count($hit/../hit) let $hitPos := xs:integer($hit/pos) let $pnOfS := xs:integer($hit/pn) let $posOfS := xs:integer($hit/pos-of-s) let $docName := $hitFatherDoc/name let $linkPageQuery := (<a href="page-query-result.xql?document={$docName}&pn={$pnOfS}&mode=xml">Page {$pnOfS}</a>) let $hitFatherPos := xs:integer($hitFatherDoc/pos) let $hitInnerLI := <li value="{$hitPos}"> {$linkPageQuery}, Sentence: {$posOfS}<br></br> <b>Sentence: </b>{data($hit/s)}<br></br> <b>Dictionary links: </b>{$hit/links}<br></br> </li> let $hitLI := if ($countHitsFatherDoc = 1) (: if only one hit for the document exist then an ul element is used else an ol element :) then <ul> {$hitInnerLI} </ul> else <ol> {$hitInnerLI} </ol> let $hitLiFatherLI := if ($hitPos = 1) then <li value="{$hitFatherPos}">{$hitFatherDoc/desc} {$hitLI} </li> else $hitLI where $pos >= $positionFrom and $pos <= $positionTo return $hitLiFatherLI let $currentTimeEnd := util:system-time() let $neededTime := time-util:duration-as-ms($currentTimeEnd - $currentTimeBegin) return <html> <head> <title>Result for your query: "{$lucene-query}"</title> </head> <body> <h2>Result of query: "{$lucene-query}"</h2> {$positionFrom}-{$positionTo} of {$countElems} results. Page: {$countPagesURLs} <ol> {$pageResult} </ol> <hr></hr> <p></p> Elapsed time: {$neededTime} ms, Back to <a href="query.xql">query page</a>, see the <a href="fulltext-query-result.xql?_source=yes">XQuery source</a> of this page </body> </html>