Mercurial > hg > mpdl-group
diff software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-in-one-doc.xql @ 7:5589d865af7a
Erstellung XQL/XSL Applikation
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 15:16:46 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-in-one-doc.xql Tue Feb 08 15:16:46 2011 +0100 @@ -0,0 +1,168 @@ +xquery version "1.0"; + +declare namespace request="http://exist-db.org/xquery/request"; +declare namespace string-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/string-util"; +declare namespace time-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/time-util"; + +declare function time-util:duration-as-ms($t) { + round((minutes-from-duration($t) * 60 + seconds-from-duration($t)) * 1000 ) +}; + +declare function string-util:getWords($strInput as xs:string?) as xs:string* { + let $wordDelim := "[,;.\s]+" + (: let $words := fn:tokenize($strInput, $wordDelim, "i") :) + let $words := mpdltext:getWords($strInput, $wordDelim, "i") + let $distinctWords := fn:distinct-values($words) + return $distinctWords +}; + +declare function string-util:putCommaBetween($elems as element()*) as element()* { + let $count := count($elems) + for $elem at $pos in $elems + let $ret := if ($pos < $count and not(empty($elem/text()))) then + <w>{$elem}, </w> + else + $elem + return $ret +}; + +declare function string-util:getSortedLinkedWords($words as xs:string*, $language as xs:string) as element()* { + let $count := count($words) + let $dictURLPart1 := "http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/dict?step=table;word=" + for $word in $words + let $lowerCaseWord := fn:lower-case($word) + let $wordLength := string-length($lowerCaseWord) + let $linkedWord := <a href="{$dictURLPart1}{$lowerCaseWord};lang={$language};pro=echo">{$lowerCaseWord}</a> + let $ret := if ($wordLength > 0) then + $linkedWord + else + <a></a> + order by $lowerCaseWord + return $ret +}; + +declare function string-util:toSortedLinkedWords($strInput as xs:string?, $language as xs:string?) as element()* { + let $words := string-util:getWords($strInput) + let $wordsWithLinks := string-util:getSortedLinkedWords($words, $language) + let $commaSep := string-util:putCommaBetween($wordsWithLinks) + return $commaSep +}; + +declare function string-util:getDummyDocument() as node() { + let $bla := + document { + element product { + attribute dept { "ACC" }, + element number { 563 }, + element name { attribute language {"en"}, "Floppy Sun Hat"} + }, + element product { + attribute dept { "BCC" }, + element number { 564 }, + element name { attribute language {"en"}, "Floppy SBun Iat"} + } + } + return $bla +}; + +let $currentTimeBegin := util:system-time() +let $lang := request:get-parameter("lang", "0") +let $language := request:get-parameter("language", "fr") +let $document := request:get-parameter("document", "alber_archi_003_en_1755") + +let $tempArchimedesDocPath := + if ($lang = "0") + then "/db/archimedes" + else concat("/db/arch/", $language) +let $archimedesDocPath := concat($tempArchimedesDocPath, "/", $document, ".xml") +let $archDoc := doc($archimedesDocPath) + +let $lucene-query := + if ($lang = "0") + then request:get-parameter("ft-query", "Illuſtriſsimi") + else request:get-parameter("ft-lang-query", "Illuſtriſsimi") + +let $resultElems := + for $s at $pos in $archDoc//s[ft:query(., $lucene-query)] + let $documentName := util:document-name($s) + let $collectionName := util:collection-name($s) + let $fullDocName := concat($collectionName, "/", $documentName) + let $docRoot := doc($fullDocName) + let $sArchInfo := $s/root()/archimedes/info let $author := string($sArchInfo/author/text()) + let $language := string($sArchInfo/lang/text()) + let $dictLinks := string-util:toSortedLinkedWords($s, $language) + let $pnOfS := count($docRoot//pb[. << $s]) (: faster: comparision only in pb elements of this document :) + (: let $pnOfS := count($s/preceding::pb[util:document-name(root()) = $documentName]) too slow: comparision in pb elements of all found documents :) + let $posOfS := count($docRoot//pb[$pnOfS]/following::s[. << $s]) + 1 (: faster: comparision only in s elements of this document :) + let $resultElem := + <elem> + <pos>{$pos}</pos> + <full-doc>{$fullDocName}</full-doc> + <name>{$documentName}</name> + <pn>{$pnOfS}</pn> + <pos-of-s>{$posOfS}</pos-of-s> + {$s} + <links>{$dictLinks}</links> + </elem> + order by $author, $pos + return $resultElem + +let $countElems := count($resultElems) +let $countPages := $countElems idiv 10 + 1 + +let $pn := fn:number(request:get-parameter("pn", "1")) +let $positionFrom := xs:integer((($pn - 1) * 10) + 1) +let $positionTo := + if ($pn = $countPages) + then $countElems + else $pn * 10 + +let $pagesURLs := + if ($lang = "0") + then concat("?ft-query=", $lucene-query) + else concat("?ft-lang-query=", $lucene-query, "&lang=1&language=", $language) + +let $countPagesURLs := + for $i in (1 to $countPages) + let $pageURL := + if ($i = $pn) + then ($i, " ") + else (<a href="{$pagesURLs}&pn={$i}">{$i}</a>, " ") + return $pageURL + +let $pageResult := + for $elem at $pos in $resultElems + let $hitPos := xs:integer($elem/pos) + let $pnOfS := xs:integer($elem/pn) + let $posOfS := xs:integer($elem/pos-of-s) + let $docName := $elem/name + let $linkPageQuery := (<a href="page-query-result.xql?document={$docName}&pn={$pnOfS}&mode=xml">Page {$pnOfS}</a>) + let $hitLI := + <li value="{$hitPos}"> + {$linkPageQuery}, Sentence: {$posOfS}<br></br> + <b>Sentence: </b>{$elem/s/text()}<br></br> + <b>Dictionary links: </b>{$elem/links}<br></br> + </li> + where $pos >= $positionFrom and $pos <= $positionTo + return $hitLI + +let $currentTimeEnd := util:system-time() +let $neededTime := time-util:duration-as-ms($currentTimeEnd - $currentTimeBegin) + + +return +<html> + <head> + <title>Result for your query: "{$lucene-query}"</title> + </head> +<body> +<h2>Result of query: "{$lucene-query}"</h2> +{$positionFrom}-{$positionTo} of {$countElems} results. Page: {$countPagesURLs} +<ol> +{$pageResult} +</ol> +<hr></hr> +<p></p> +Elapsed time: {$neededTime} ms, Back to <a href="query.xql">query page</a>, see the <a href="fulltext-query-result.xql?_source=yes">XQuery source</a> of this page +</body> +</html> \ No newline at end of file