view software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-detail.xql @ 7:5589d865af7a

Erstellung XQL/XSL Applikation
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 08 Feb 2011 15:16:46 +0100
parents
children
line wrap: on
line source

xquery version "1.0";

declare namespace request="http://exist-db.org/xquery/request";
declare namespace string-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/string-util"; 
declare namespace time-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/time-util"; 

declare function time-util:duration-as-ms($t) {
  round((minutes-from-duration($t) * 60 + seconds-from-duration($t)) * 1000 )
};

declare function string-util:getWords($strInput as xs:string?) as xs:string* {
  let $wordDelim := "[,;.\s]+" 
  (: let $words := fn:tokenize($strInput, $wordDelim, "i")  :)
  let $words := mpdltext:getWords($strInput, $wordDelim, "i")
  let $distinctWords := fn:distinct-values($words)
  return $distinctWords
};

declare function string-util:putCommaBetween($elems as element()*) as element()* {
  let $count := count($elems)
  for $elem at $pos in $elems
  let $ret := if ($pos < $count and not(empty($elem/text()))) then
                <w>{$elem}, </w>
              else
                $elem
  return $ret
};

declare function string-util:getSortedLinkedWords($words as xs:string*, $language as xs:string) as element()* {
  let $count := count($words)
  let $dictURLPart1 := "http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/dict?step=table;word="
  for $word in $words
  let $lowerCaseWord := fn:lower-case($word)
  let $wordLength := string-length($lowerCaseWord)
  let $linkedWord := <a href="{$dictURLPart1}{$lowerCaseWord};lang={$language};pro=echo">{$lowerCaseWord}</a>
  let $ret := if ($wordLength > 0) then
                $linkedWord
              else
                <a></a>
  order by $lowerCaseWord
  return $ret
};

declare function string-util:toSortedLinkedWords($strInput as xs:string?, $language as xs:string?) as element()* {
  let $words := string-util:getWords($strInput)
  let $wordsWithLinks := string-util:getSortedLinkedWords($words, $language)
  let $commaSep := string-util:putCommaBetween($wordsWithLinks)
  return $commaSep
};

declare function string-util:getDummyDocument() as node() {
  let $bla := 
    document {
      element product {
        attribute dept { "ACC" },
        element number { 563 },
        element name { attribute language {"en"}, "Floppy Sun Hat"}
      },
      element product {
        attribute dept { "BCC" },
        element number { 564 },
        element name { attribute language {"en"}, "Floppy SBun Iat"}
      }
    }
  return $bla
};

let $currentTimeBegin := util:system-time()
let $lang := request:get-parameter("lang", "0")
let $language := request:get-parameter("language", "fr")
let $document := request:get-parameter("document", "")

let $tempArchimedesDocPath :=
  if ($lang = "0")
  then "/db/archimedes"
  else concat("/db/arch/", $language)

let $archimedesDocPath := 
  if ($document = "")
  then $tempArchimedesDocPath
  else concat($tempArchimedesDocPath, "/", $document, ".xml")

let $archCollection := 
  if ($document = "")
  then collection($archimedesDocPath)
  else doc($archimedesDocPath)

let $lucene-query := 
  if ($lang = "0")
  then request:get-parameter("ft-query", "Illuſtriſsimi")
  else request:get-parameter("ft-lang-query", "Illuſtriſsimi")

let $tempResultElems :=
  for $s at $pos in $archCollection//s[ft:query(., $lucene-query)]
    let $documentName := util:document-name($s)
    let $collectionName := util:collection-name($s)
    let $fullDocName := concat($collectionName, "/", $documentName)
    let $docRoot := doc($fullDocName)
    let $sArchInfo := $s/root()/archimedes/info
    let $author := string($sArchInfo/author/text())
    let $language := string($sArchInfo/lang/text())
    let $dictLinks := string-util:toSortedLinkedWords($s, $language)
    let $pnOfS := count($docRoot//pb[. << $s])    (: faster: comparision only in pb elements of this document :)
    (: let $pnOfS := count($s/preceding::pb[util:document-name(root()) = $documentName])  too slow: comparision in pb elements of all found documents  :)
    let $posOfS := count($docRoot//pb[$pnOfS]/following::s[. << $s]) + 1    (: faster: comparision only in s elements of this document :)
    let $resultElem := 
      <elem>
        <pos>{$pos}</pos>
        <full-doc>{$fullDocName}</full-doc>
        <pn>{$pnOfS}</pn>
        <pos-of-s>{$posOfS}</pos-of-s>
        {$s}
        <links>{$dictLinks}</links>
      </elem>
  order by $author, $pos
  return $resultElem

(: group by operator: group documents with their hits    :) 
let $resultElems :=
  for $full-doc at $docPos in distinct-values($tempResultElems/full-doc)
    let $docName := substring-before(util:document-name($full-doc), ".")
    let $docPath := doc($full-doc)
    let $sArchInfo := $docPath/archimedes/info
    let $author := string($sArchInfo/author/text())
    let $authorLit := concat($author, ". ") 
    let $title := string($sArchInfo/title/text())
    let $titleLit := if ($title = "") then "" else concat($title, ". ") 
    let $place := string($sArchInfo/place/text())
    let $date := string($sArchInfo/date/text())
    let $placeDateLit := if ($place = "" and $date = "") then "" else if ($place != "" and $date = "") then concat($place, ". ") else if ($place = "" and $date != "") then concat($date, ". ") else concat($place, ", ", $date, ". ")  
    let $docShortDesc := ($authorLit, $titleLit, $placeDateLit, " [", <a href="/exist/rest{$full-doc}">XML content</a>, "]")
    let $sInDocElem := $tempResultElems[full-doc = $full-doc]
    let $docElem := 
      for $e at $pos in $sInDocElem
        let $pnOfS := $e/pn
        let $posOfS := $e/pos-of-s
      order by $pos
        return 
          <hit>
            <pos>{$pos}</pos>
            <pn>{$pnOfS}</pn>
            <pos-of-s>{$posOfS}</pos-of-s>
            {$e/s}
            {$e/links}
          </hit>
  return
    <doc>
      <pos>{$docPos}</pos>
      <name>{$docName}</name>
      <desc>{$docShortDesc}</desc>
      <hits>
        {$docElem}
      </hits>
    </doc>

let $countElems := count($tempResultElems)
let $countPages := $countElems idiv 10 + 1 

let $pn := fn:number(request:get-parameter("pn", "1"))
let $positionFrom := xs:integer((($pn - 1) * 10) + 1)
let $positionTo := 
  if ($pn = $countPages)
  then $countElems
  else $pn * 10

let $pagesURLs :=
  if ($lang = "0")
  then concat("?ft-query=", $lucene-query)
  else concat("?ft-lang-query=", $lucene-query, "&amp;lang=1&amp;language=", $language)

let $countPagesURLs :=
  for $i in (1 to $countPages)
  let $pageURL := 
    if ($i = $pn) 
    then ($i, " ")
    else (<a href="{$pagesURLs}&amp;pn={$i}">{$i}</a>, " ")
  return $pageURL

(: let $pageHits := subsequence($resultElems/hits/hit, $positionFrom, $positionTo)     does not work correctly !!!    :)
let $pageHits := $resultElems/hits/hit     (: fetch all hit elements   :)
let $pageResult := 
  for $hit at $pos in $pageHits
    let $hitFatherDoc := $hit/../..
    let $countHitsFatherDoc := count($hit/../hit)
    let $hitPos := xs:integer($hit/pos)
    let $pnOfS := xs:integer($hit/pn)
    let $posOfS := xs:integer($hit/pos-of-s)
    let $docName := $hitFatherDoc/name
    let $linkPageQuery := (<a href="page-query-result.xql?document={$docName}&amp;pn={$pnOfS}&amp;mode=xml">Page {$pnOfS}</a>)
    let $hitFatherPos := xs:integer($hitFatherDoc/pos)
    let $hitInnerLI :=
        <li value="{$hitPos}">
          {$linkPageQuery}, Sentence: {$posOfS}<br></br>
          <b>Sentence: </b>{data($hit/s)}<br></br>
          <b>Dictionary links: </b>{$hit/links}<br></br>
        </li>
    let $hitLI := 
      if ($countHitsFatherDoc = 1)   (: if only one hit for the document exist then an ul element is used else an ol element    :)
      then
        <ul>
          {$hitInnerLI}
        </ul>
      else   
        <ol>
          {$hitInnerLI}
        </ol>
    let $hitLiFatherLI :=
      if ($hitPos = 1)
      then
        <li value="{$hitFatherPos}">{$hitFatherDoc/desc}
          {$hitLI}
        </li>
      else
        $hitLI
  where $pos >= $positionFrom and $pos <= $positionTo
  return $hitLiFatherLI 

let $currentTimeEnd := util:system-time()
let $neededTime := time-util:duration-as-ms($currentTimeEnd - $currentTimeBegin)


return
<html>
 <head>
  <title>Result for your query: "{$lucene-query}"</title>
 </head>
<body>
<h2>Result of query: "{$lucene-query}"</h2>
{$positionFrom}-{$positionTo} of {$countElems} results. Page: {$countPagesURLs}
<ol>
{$pageResult}
</ol>
<hr></hr>
<p></p>
Elapsed time: {$neededTime} ms, Back to <a href="query.xql">query page</a>, see the <a href="fulltext-query-result.xql?_source=yes">XQuery source</a> of this page
</body>
</html>