diff software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-detail.xql @ 7:5589d865af7a

Erstellung XQL/XSL Applikation
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 08 Feb 2011 15:16:46 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-detail.xql	Tue Feb 08 15:16:46 2011 +0100
@@ -0,0 +1,234 @@
+xquery version "1.0";
+
+declare namespace request="http://exist-db.org/xquery/request";
+declare namespace string-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/string-util"; 
+declare namespace time-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/time-util"; 
+
+declare function time-util:duration-as-ms($t) {
+  round((minutes-from-duration($t) * 60 + seconds-from-duration($t)) * 1000 )
+};
+
+declare function string-util:getWords($strInput as xs:string?) as xs:string* {
+  let $wordDelim := "[,;.\s]+" 
+  (: let $words := fn:tokenize($strInput, $wordDelim, "i")  :)
+  let $words := mpdltext:getWords($strInput, $wordDelim, "i")
+  let $distinctWords := fn:distinct-values($words)
+  return $distinctWords
+};
+
+declare function string-util:putCommaBetween($elems as element()*) as element()* {
+  let $count := count($elems)
+  for $elem at $pos in $elems
+  let $ret := if ($pos < $count and not(empty($elem/text()))) then
+                <w>{$elem}, </w>
+              else
+                $elem
+  return $ret
+};
+
+declare function string-util:getSortedLinkedWords($words as xs:string*, $language as xs:string) as element()* {
+  let $count := count($words)
+  let $dictURLPart1 := "http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/dict?step=table;word="
+  for $word in $words
+  let $lowerCaseWord := fn:lower-case($word)
+  let $wordLength := string-length($lowerCaseWord)
+  let $linkedWord := <a href="{$dictURLPart1}{$lowerCaseWord};lang={$language};pro=echo">{$lowerCaseWord}</a>
+  let $ret := if ($wordLength > 0) then
+                $linkedWord
+              else
+                <a></a>
+  order by $lowerCaseWord
+  return $ret
+};
+
+declare function string-util:toSortedLinkedWords($strInput as xs:string?, $language as xs:string?) as element()* {
+  let $words := string-util:getWords($strInput)
+  let $wordsWithLinks := string-util:getSortedLinkedWords($words, $language)
+  let $commaSep := string-util:putCommaBetween($wordsWithLinks)
+  return $commaSep
+};
+
+declare function string-util:getDummyDocument() as node() {
+  let $bla := 
+    document {
+      element product {
+        attribute dept { "ACC" },
+        element number { 563 },
+        element name { attribute language {"en"}, "Floppy Sun Hat"}
+      },
+      element product {
+        attribute dept { "BCC" },
+        element number { 564 },
+        element name { attribute language {"en"}, "Floppy SBun Iat"}
+      }
+    }
+  return $bla
+};
+
+let $currentTimeBegin := util:system-time()
+let $lang := request:get-parameter("lang", "0")
+let $language := request:get-parameter("language", "fr")
+let $document := request:get-parameter("document", "")
+
+let $tempArchimedesDocPath :=
+  if ($lang = "0")
+  then "/db/archimedes"
+  else concat("/db/arch/", $language)
+
+let $archimedesDocPath := 
+  if ($document = "")
+  then $tempArchimedesDocPath
+  else concat($tempArchimedesDocPath, "/", $document, ".xml")
+
+let $archCollection := 
+  if ($document = "")
+  then collection($archimedesDocPath)
+  else doc($archimedesDocPath)
+
+let $lucene-query := 
+  if ($lang = "0")
+  then request:get-parameter("ft-query", "Illuſtriſsimi")
+  else request:get-parameter("ft-lang-query", "Illuſtriſsimi")
+
+let $tempResultElems :=
+  for $s at $pos in $archCollection//s[ft:query(., $lucene-query)]
+    let $documentName := util:document-name($s)
+    let $collectionName := util:collection-name($s)
+    let $fullDocName := concat($collectionName, "/", $documentName)
+    let $docRoot := doc($fullDocName)
+    let $sArchInfo := $s/root()/archimedes/info
    let $author := string($sArchInfo/author/text())
+    let $language := string($sArchInfo/lang/text())
+    let $dictLinks := string-util:toSortedLinkedWords($s, $language)
+    let $pnOfS := count($docRoot//pb[. << $s])    (: faster: comparision only in pb elements of this document :)
+    (: let $pnOfS := count($s/preceding::pb[util:document-name(root()) = $documentName])  too slow: comparision in pb elements of all found documents  :)
+    let $posOfS := count($docRoot//pb[$pnOfS]/following::s[. << $s]) + 1    (: faster: comparision only in s elements of this document :)
+    let $resultElem := 
+      <elem>
+        <pos>{$pos}</pos>
+        <full-doc>{$fullDocName}</full-doc>
+        <pn>{$pnOfS}</pn>
+        <pos-of-s>{$posOfS}</pos-of-s>
+        {$s}
+        <links>{$dictLinks}</links>
+      </elem>
+  order by $author, $pos
+  return $resultElem
+
+(: group by operator: group documents with their hits    :) 
+let $resultElems :=
+  for $full-doc at $docPos in distinct-values($tempResultElems/full-doc)
+    let $docName := substring-before(util:document-name($full-doc), ".")
+    let $docPath := doc($full-doc)
+    let $sArchInfo := $docPath/archimedes/info
    let $author := string($sArchInfo/author/text())
+    let $authorLit := concat($author, ". ") 
+    let $title := string($sArchInfo/title/text())
+    let $titleLit := if ($title = "") then "" else concat($title, ". ") 
+    let $place := string($sArchInfo/place/text())
+    let $date := string($sArchInfo/date/text())
+    let $placeDateLit := if ($place = "" and $date = "") then "" else if ($place != "" and $date = "") then concat($place, ". ") else if ($place = "" and $date != "") then concat($date, ". ") else concat($place, ", ", $date, ". ")  
+    let $docShortDesc := ($authorLit, $titleLit, $placeDateLit, " [", <a href="/exist/rest{$full-doc}">XML content</a>, "]")
+    let $sInDocElem := $tempResultElems[full-doc = $full-doc]
+    let $docElem := 
+      for $e at $pos in $sInDocElem
+        let $pnOfS := $e/pn
+        let $posOfS := $e/pos-of-s
+      order by $pos
+        return 
+          <hit>
+            <pos>{$pos}</pos>
+            <pn>{$pnOfS}</pn>
+            <pos-of-s>{$posOfS}</pos-of-s>
+            {$e/s}
+            {$e/links}
+          </hit>
+  return
+    <doc>
+      <pos>{$docPos}</pos>
+      <name>{$docName}</name>
+      <desc>{$docShortDesc}</desc>
+      <hits>
+        {$docElem}
+      </hits>
+    </doc>
+
+let $countElems := count($tempResultElems)
+let $countPages := $countElems idiv 10 + 1 
+
+let $pn := fn:number(request:get-parameter("pn", "1"))
+let $positionFrom := xs:integer((($pn - 1) * 10) + 1)
+let $positionTo := 
+  if ($pn = $countPages)
+  then $countElems
+  else $pn * 10
+
+let $pagesURLs :=
+  if ($lang = "0")
+  then concat("?ft-query=", $lucene-query)
+  else concat("?ft-lang-query=", $lucene-query, "&amp;lang=1&amp;language=", $language)
+
+let $countPagesURLs :=
+  for $i in (1 to $countPages)
+  let $pageURL := 
+    if ($i = $pn) 
+    then ($i, " ")
+    else (<a href="{$pagesURLs}&amp;pn={$i}">{$i}</a>, " ")
+  return $pageURL
+
+(: let $pageHits := subsequence($resultElems/hits/hit, $positionFrom, $positionTo)     does not work correctly !!!    :)
+let $pageHits := $resultElems/hits/hit     (: fetch all hit elements   :)
+let $pageResult := 
+  for $hit at $pos in $pageHits
+    let $hitFatherDoc := $hit/../..
+    let $countHitsFatherDoc := count($hit/../hit)
+    let $hitPos := xs:integer($hit/pos)
+    let $pnOfS := xs:integer($hit/pn)
+    let $posOfS := xs:integer($hit/pos-of-s)
+    let $docName := $hitFatherDoc/name
+    let $linkPageQuery := (<a href="page-query-result.xql?document={$docName}&amp;pn={$pnOfS}&amp;mode=xml">Page {$pnOfS}</a>)
+    let $hitFatherPos := xs:integer($hitFatherDoc/pos)
+    let $hitInnerLI :=
+        <li value="{$hitPos}">
+          {$linkPageQuery}, Sentence: {$posOfS}<br></br>
+          <b>Sentence: </b>{data($hit/s)}<br></br>
+          <b>Dictionary links: </b>{$hit/links}<br></br>
+        </li>
+    let $hitLI := 
+      if ($countHitsFatherDoc = 1)   (: if only one hit for the document exist then an ul element is used else an ol element    :)
+      then
+        <ul>
+          {$hitInnerLI}
+        </ul>
+      else   
+        <ol>
+          {$hitInnerLI}
+        </ol>
+    let $hitLiFatherLI :=
+      if ($hitPos = 1)
+      then
+        <li value="{$hitFatherPos}">{$hitFatherDoc/desc}
+          {$hitLI}
+        </li>
+      else
        $hitLI
+  where $pos >= $positionFrom and $pos <= $positionTo
+  return $hitLiFatherLI 
+
+let $currentTimeEnd := util:system-time()
+let $neededTime := time-util:duration-as-ms($currentTimeEnd - $currentTimeBegin)
+
+
+return
+<html>
+ <head>
+  <title>Result for your query: "{$lucene-query}"</title>
+ </head>
+<body>
+<h2>Result of query: "{$lucene-query}"</h2>
+{$positionFrom}-{$positionTo} of {$countElems} results. Page: {$countPagesURLs}
+<ol>
+{$pageResult}
+</ol>
+<hr></hr>
+<p></p>
+Elapsed time: {$neededTime} ms, Back to <a href="query.xql">query page</a>, see the <a href="fulltext-query-result.xql?_source=yes">XQuery source</a> of this page
+</body>
+</html>
\ No newline at end of file