Mercurial > hg > mpdl-group
diff software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-detail.xql @ 7:5589d865af7a
Erstellung XQL/XSL Applikation
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 15:16:46 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-detail.xql Tue Feb 08 15:16:46 2011 +0100 @@ -0,0 +1,234 @@ +xquery version "1.0"; + +declare namespace request="http://exist-db.org/xquery/request"; +declare namespace string-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/string-util"; +declare namespace time-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/time-util"; + +declare function time-util:duration-as-ms($t) { + round((minutes-from-duration($t) * 60 + seconds-from-duration($t)) * 1000 ) +}; + +declare function string-util:getWords($strInput as xs:string?) as xs:string* { + let $wordDelim := "[,;.\s]+" + (: let $words := fn:tokenize($strInput, $wordDelim, "i") :) + let $words := mpdltext:getWords($strInput, $wordDelim, "i") + let $distinctWords := fn:distinct-values($words) + return $distinctWords +}; + +declare function string-util:putCommaBetween($elems as element()*) as element()* { + let $count := count($elems) + for $elem at $pos in $elems + let $ret := if ($pos < $count and not(empty($elem/text()))) then + <w>{$elem}, </w> + else + $elem + return $ret +}; + +declare function string-util:getSortedLinkedWords($words as xs:string*, $language as xs:string) as element()* { + let $count := count($words) + let $dictURLPart1 := "http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/dict?step=table;word=" + for $word in $words + let $lowerCaseWord := fn:lower-case($word) + let $wordLength := string-length($lowerCaseWord) + let $linkedWord := <a href="{$dictURLPart1}{$lowerCaseWord};lang={$language};pro=echo">{$lowerCaseWord}</a> + let $ret := if ($wordLength > 0) then + $linkedWord + else + <a></a> + order by $lowerCaseWord + return $ret +}; + +declare function string-util:toSortedLinkedWords($strInput as xs:string?, $language as xs:string?) as element()* { + let $words := string-util:getWords($strInput) + let $wordsWithLinks := string-util:getSortedLinkedWords($words, $language) + let $commaSep := string-util:putCommaBetween($wordsWithLinks) + return $commaSep +}; + +declare function string-util:getDummyDocument() as node() { + let $bla := + document { + element product { + attribute dept { "ACC" }, + element number { 563 }, + element name { attribute language {"en"}, "Floppy Sun Hat"} + }, + element product { + attribute dept { "BCC" }, + element number { 564 }, + element name { attribute language {"en"}, "Floppy SBun Iat"} + } + } + return $bla +}; + +let $currentTimeBegin := util:system-time() +let $lang := request:get-parameter("lang", "0") +let $language := request:get-parameter("language", "fr") +let $document := request:get-parameter("document", "") + +let $tempArchimedesDocPath := + if ($lang = "0") + then "/db/archimedes" + else concat("/db/arch/", $language) + +let $archimedesDocPath := + if ($document = "") + then $tempArchimedesDocPath + else concat($tempArchimedesDocPath, "/", $document, ".xml") + +let $archCollection := + if ($document = "") + then collection($archimedesDocPath) + else doc($archimedesDocPath) + +let $lucene-query := + if ($lang = "0") + then request:get-parameter("ft-query", "Illuſtriſsimi") + else request:get-parameter("ft-lang-query", "Illuſtriſsimi") + +let $tempResultElems := + for $s at $pos in $archCollection//s[ft:query(., $lucene-query)] + let $documentName := util:document-name($s) + let $collectionName := util:collection-name($s) + let $fullDocName := concat($collectionName, "/", $documentName) + let $docRoot := doc($fullDocName) + let $sArchInfo := $s/root()/archimedes/info let $author := string($sArchInfo/author/text()) + let $language := string($sArchInfo/lang/text()) + let $dictLinks := string-util:toSortedLinkedWords($s, $language) + let $pnOfS := count($docRoot//pb[. << $s]) (: faster: comparision only in pb elements of this document :) + (: let $pnOfS := count($s/preceding::pb[util:document-name(root()) = $documentName]) too slow: comparision in pb elements of all found documents :) + let $posOfS := count($docRoot//pb[$pnOfS]/following::s[. << $s]) + 1 (: faster: comparision only in s elements of this document :) + let $resultElem := + <elem> + <pos>{$pos}</pos> + <full-doc>{$fullDocName}</full-doc> + <pn>{$pnOfS}</pn> + <pos-of-s>{$posOfS}</pos-of-s> + {$s} + <links>{$dictLinks}</links> + </elem> + order by $author, $pos + return $resultElem + +(: group by operator: group documents with their hits :) +let $resultElems := + for $full-doc at $docPos in distinct-values($tempResultElems/full-doc) + let $docName := substring-before(util:document-name($full-doc), ".") + let $docPath := doc($full-doc) + let $sArchInfo := $docPath/archimedes/info let $author := string($sArchInfo/author/text()) + let $authorLit := concat($author, ". ") + let $title := string($sArchInfo/title/text()) + let $titleLit := if ($title = "") then "" else concat($title, ". ") + let $place := string($sArchInfo/place/text()) + let $date := string($sArchInfo/date/text()) + let $placeDateLit := if ($place = "" and $date = "") then "" else if ($place != "" and $date = "") then concat($place, ". ") else if ($place = "" and $date != "") then concat($date, ". ") else concat($place, ", ", $date, ". ") + let $docShortDesc := ($authorLit, $titleLit, $placeDateLit, " [", <a href="/exist/rest{$full-doc}">XML content</a>, "]") + let $sInDocElem := $tempResultElems[full-doc = $full-doc] + let $docElem := + for $e at $pos in $sInDocElem + let $pnOfS := $e/pn + let $posOfS := $e/pos-of-s + order by $pos + return + <hit> + <pos>{$pos}</pos> + <pn>{$pnOfS}</pn> + <pos-of-s>{$posOfS}</pos-of-s> + {$e/s} + {$e/links} + </hit> + return + <doc> + <pos>{$docPos}</pos> + <name>{$docName}</name> + <desc>{$docShortDesc}</desc> + <hits> + {$docElem} + </hits> + </doc> + +let $countElems := count($tempResultElems) +let $countPages := $countElems idiv 10 + 1 + +let $pn := fn:number(request:get-parameter("pn", "1")) +let $positionFrom := xs:integer((($pn - 1) * 10) + 1) +let $positionTo := + if ($pn = $countPages) + then $countElems + else $pn * 10 + +let $pagesURLs := + if ($lang = "0") + then concat("?ft-query=", $lucene-query) + else concat("?ft-lang-query=", $lucene-query, "&lang=1&language=", $language) + +let $countPagesURLs := + for $i in (1 to $countPages) + let $pageURL := + if ($i = $pn) + then ($i, " ") + else (<a href="{$pagesURLs}&pn={$i}">{$i}</a>, " ") + return $pageURL + +(: let $pageHits := subsequence($resultElems/hits/hit, $positionFrom, $positionTo) does not work correctly !!! :) +let $pageHits := $resultElems/hits/hit (: fetch all hit elements :) +let $pageResult := + for $hit at $pos in $pageHits + let $hitFatherDoc := $hit/../.. + let $countHitsFatherDoc := count($hit/../hit) + let $hitPos := xs:integer($hit/pos) + let $pnOfS := xs:integer($hit/pn) + let $posOfS := xs:integer($hit/pos-of-s) + let $docName := $hitFatherDoc/name + let $linkPageQuery := (<a href="page-query-result.xql?document={$docName}&pn={$pnOfS}&mode=xml">Page {$pnOfS}</a>) + let $hitFatherPos := xs:integer($hitFatherDoc/pos) + let $hitInnerLI := + <li value="{$hitPos}"> + {$linkPageQuery}, Sentence: {$posOfS}<br></br> + <b>Sentence: </b>{data($hit/s)}<br></br> + <b>Dictionary links: </b>{$hit/links}<br></br> + </li> + let $hitLI := + if ($countHitsFatherDoc = 1) (: if only one hit for the document exist then an ul element is used else an ol element :) + then + <ul> + {$hitInnerLI} + </ul> + else + <ol> + {$hitInnerLI} + </ol> + let $hitLiFatherLI := + if ($hitPos = 1) + then + <li value="{$hitFatherPos}">{$hitFatherDoc/desc} + {$hitLI} + </li> + else $hitLI + where $pos >= $positionFrom and $pos <= $positionTo + return $hitLiFatherLI + +let $currentTimeEnd := util:system-time() +let $neededTime := time-util:duration-as-ms($currentTimeEnd - $currentTimeBegin) + + +return +<html> + <head> + <title>Result for your query: "{$lucene-query}"</title> + </head> +<body> +<h2>Result of query: "{$lucene-query}"</h2> +{$positionFrom}-{$positionTo} of {$countElems} results. Page: {$countPagesURLs} +<ol> +{$pageResult} +</ol> +<hr></hr> +<p></p> +Elapsed time: {$neededTime} ms, Back to <a href="query.xql">query page</a>, see the <a href="fulltext-query-result.xql?_source=yes">XQuery source</a> of this page +</body> +</html> \ No newline at end of file