diff software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-in-one-doc.xql @ 7:5589d865af7a

Erstellung XQL/XSL Applikation
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 08 Feb 2011 15:16:46 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-in-one-doc.xql	Tue Feb 08 15:16:46 2011 +0100
@@ -0,0 +1,168 @@
+xquery version "1.0";
+
+declare namespace request="http://exist-db.org/xquery/request";
+declare namespace string-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/string-util"; 
+declare namespace time-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/time-util"; 
+
+declare function time-util:duration-as-ms($t) {
+  round((minutes-from-duration($t) * 60 + seconds-from-duration($t)) * 1000 )
+};
+
+declare function string-util:getWords($strInput as xs:string?) as xs:string* {
+  let $wordDelim := "[,;.\s]+" 
+  (: let $words := fn:tokenize($strInput, $wordDelim, "i")  :)
+  let $words := mpdltext:getWords($strInput, $wordDelim, "i")
+  let $distinctWords := fn:distinct-values($words)
+  return $distinctWords
+};
+
+declare function string-util:putCommaBetween($elems as element()*) as element()* {
+  let $count := count($elems)
+  for $elem at $pos in $elems
+  let $ret := if ($pos < $count and not(empty($elem/text()))) then
+                <w>{$elem}, </w>
+              else
+                $elem
+  return $ret
+};
+
+declare function string-util:getSortedLinkedWords($words as xs:string*, $language as xs:string) as element()* {
+  let $count := count($words)
+  let $dictURLPart1 := "http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/dict?step=table;word="
+  for $word in $words
+  let $lowerCaseWord := fn:lower-case($word)
+  let $wordLength := string-length($lowerCaseWord)
+  let $linkedWord := <a href="{$dictURLPart1}{$lowerCaseWord};lang={$language};pro=echo">{$lowerCaseWord}</a>
+  let $ret := if ($wordLength > 0) then
+                $linkedWord
+              else
+                <a></a>
+  order by $lowerCaseWord
+  return $ret
+};
+
+declare function string-util:toSortedLinkedWords($strInput as xs:string?, $language as xs:string?) as element()* {
+  let $words := string-util:getWords($strInput)
+  let $wordsWithLinks := string-util:getSortedLinkedWords($words, $language)
+  let $commaSep := string-util:putCommaBetween($wordsWithLinks)
+  return $commaSep
+};
+
+declare function string-util:getDummyDocument() as node() {
+  let $bla := 
+    document {
+      element product {
+        attribute dept { "ACC" },
+        element number { 563 },
+        element name { attribute language {"en"}, "Floppy Sun Hat"}
+      },
+      element product {
+        attribute dept { "BCC" },
+        element number { 564 },
+        element name { attribute language {"en"}, "Floppy SBun Iat"}
+      }
+    }
+  return $bla
+};
+
+let $currentTimeBegin := util:system-time()
+let $lang := request:get-parameter("lang", "0")
+let $language := request:get-parameter("language", "fr")
+let $document := request:get-parameter("document", "alber_archi_003_en_1755")
+
+let $tempArchimedesDocPath :=
+  if ($lang = "0")
+  then "/db/archimedes"
+  else concat("/db/arch/", $language)
+let $archimedesDocPath := concat($tempArchimedesDocPath, "/", $document, ".xml")
+let $archDoc := doc($archimedesDocPath)
+
+let $lucene-query := 
+  if ($lang = "0")
+  then request:get-parameter("ft-query", "Illuſtriſsimi")
+  else request:get-parameter("ft-lang-query", "Illuſtriſsimi")
+
+let $resultElems :=
+  for $s at $pos in $archDoc//s[ft:query(., $lucene-query)]
+    let $documentName := util:document-name($s)
+    let $collectionName := util:collection-name($s)
+    let $fullDocName := concat($collectionName, "/", $documentName)
+    let $docRoot := doc($fullDocName)
+    let $sArchInfo := $s/root()/archimedes/info
    let $author := string($sArchInfo/author/text())
+    let $language := string($sArchInfo/lang/text())
+    let $dictLinks := string-util:toSortedLinkedWords($s, $language)
+    let $pnOfS := count($docRoot//pb[. << $s])    (: faster: comparision only in pb elements of this document :)
+    (: let $pnOfS := count($s/preceding::pb[util:document-name(root()) = $documentName])  too slow: comparision in pb elements of all found documents  :)
+    let $posOfS := count($docRoot//pb[$pnOfS]/following::s[. << $s]) + 1    (: faster: comparision only in s elements of this document :)
+    let $resultElem := 
+      <elem>
+        <pos>{$pos}</pos>
+        <full-doc>{$fullDocName}</full-doc>
+        <name>{$documentName}</name>
+        <pn>{$pnOfS}</pn>
+        <pos-of-s>{$posOfS}</pos-of-s>
+        {$s}
+        <links>{$dictLinks}</links>
+      </elem>
+  order by $author, $pos
+  return $resultElem
+
+let $countElems := count($resultElems)
+let $countPages := $countElems idiv 10 + 1 
+
+let $pn := fn:number(request:get-parameter("pn", "1"))
+let $positionFrom := xs:integer((($pn - 1) * 10) + 1)
+let $positionTo := 
+  if ($pn = $countPages)
+  then $countElems
+  else $pn * 10
+
+let $pagesURLs :=
+  if ($lang = "0")
+  then concat("?ft-query=", $lucene-query)
+  else concat("?ft-lang-query=", $lucene-query, "&amp;lang=1&amp;language=", $language)
+
+let $countPagesURLs :=
+  for $i in (1 to $countPages)
+  let $pageURL := 
+    if ($i = $pn) 
+    then ($i, " ")
+    else (<a href="{$pagesURLs}&amp;pn={$i}">{$i}</a>, " ")
+  return $pageURL
+
+let $pageResult := 
+  for $elem at $pos in $resultElems
+    let $hitPos := xs:integer($elem/pos)
+    let $pnOfS := xs:integer($elem/pn)
+    let $posOfS := xs:integer($elem/pos-of-s)
+    let $docName := $elem/name
+    let $linkPageQuery := (<a href="page-query-result.xql?document={$docName}&amp;pn={$pnOfS}&amp;mode=xml">Page {$pnOfS}</a>)
+    let $hitLI :=
+        <li value="{$hitPos}">
+          {$linkPageQuery}, Sentence: {$posOfS}<br></br>
+          <b>Sentence: </b>{$elem/s/text()}<br></br>
+          <b>Dictionary links: </b>{$elem/links}<br></br>
+        </li>
+  where $pos >= $positionFrom and $pos <= $positionTo
+  return $hitLI 
+
+let $currentTimeEnd := util:system-time()
+let $neededTime := time-util:duration-as-ms($currentTimeEnd - $currentTimeBegin)
+
+
+return
+<html>
+ <head>
+  <title>Result for your query: "{$lucene-query}"</title>
+ </head>
+<body>
+<h2>Result of query: "{$lucene-query}"</h2>
+{$positionFrom}-{$positionTo} of {$countElems} results. Page: {$countPagesURLs}
+<ol>
+{$pageResult}
+</ol>
+<hr></hr>
+<p></p>
+Elapsed time: {$neededTime} ms, Back to <a href="query.xql">query page</a>, see the <a href="fulltext-query-result.xql?_source=yes">XQuery source</a> of this page
+</body>
+</html>
\ No newline at end of file