view software/eXist/webapp/mpdl/_stuff/futureDev/fulltext-query-result-in-one-doc.xql @ 7:5589d865af7a

Erstellung XQL/XSL Applikation
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 08 Feb 2011 15:16:46 +0100
parents
children
line wrap: on
line source

xquery version "1.0";

declare namespace request="http://exist-db.org/xquery/request";
declare namespace string-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/string-util"; 
declare namespace time-util = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/time-util"; 

declare function time-util:duration-as-ms($t) {
  round((minutes-from-duration($t) * 60 + seconds-from-duration($t)) * 1000 )
};

declare function string-util:getWords($strInput as xs:string?) as xs:string* {
  let $wordDelim := "[,;.\s]+" 
  (: let $words := fn:tokenize($strInput, $wordDelim, "i")  :)
  let $words := mpdltext:getWords($strInput, $wordDelim, "i")
  let $distinctWords := fn:distinct-values($words)
  return $distinctWords
};

declare function string-util:putCommaBetween($elems as element()*) as element()* {
  let $count := count($elems)
  for $elem at $pos in $elems
  let $ret := if ($pos < $count and not(empty($elem/text()))) then
                <w>{$elem}, </w>
              else
                $elem
  return $ret
};

declare function string-util:getSortedLinkedWords($words as xs:string*, $language as xs:string) as element()* {
  let $count := count($words)
  let $dictURLPart1 := "http://archimedes.mpiwg-berlin.mpg.de/cgi-bin/toc/dict?step=table;word="
  for $word in $words
  let $lowerCaseWord := fn:lower-case($word)
  let $wordLength := string-length($lowerCaseWord)
  let $linkedWord := <a href="{$dictURLPart1}{$lowerCaseWord};lang={$language};pro=echo">{$lowerCaseWord}</a>
  let $ret := if ($wordLength > 0) then
                $linkedWord
              else
                <a></a>
  order by $lowerCaseWord
  return $ret
};

declare function string-util:toSortedLinkedWords($strInput as xs:string?, $language as xs:string?) as element()* {
  let $words := string-util:getWords($strInput)
  let $wordsWithLinks := string-util:getSortedLinkedWords($words, $language)
  let $commaSep := string-util:putCommaBetween($wordsWithLinks)
  return $commaSep
};

declare function string-util:getDummyDocument() as node() {
  let $bla := 
    document {
      element product {
        attribute dept { "ACC" },
        element number { 563 },
        element name { attribute language {"en"}, "Floppy Sun Hat"}
      },
      element product {
        attribute dept { "BCC" },
        element number { 564 },
        element name { attribute language {"en"}, "Floppy SBun Iat"}
      }
    }
  return $bla
};

let $currentTimeBegin := util:system-time()
let $lang := request:get-parameter("lang", "0")
let $language := request:get-parameter("language", "fr")
let $document := request:get-parameter("document", "alber_archi_003_en_1755")

let $tempArchimedesDocPath :=
  if ($lang = "0")
  then "/db/archimedes"
  else concat("/db/arch/", $language)
let $archimedesDocPath := concat($tempArchimedesDocPath, "/", $document, ".xml")
let $archDoc := doc($archimedesDocPath)

let $lucene-query := 
  if ($lang = "0")
  then request:get-parameter("ft-query", "Illuſtriſsimi")
  else request:get-parameter("ft-lang-query", "Illuſtriſsimi")

let $resultElems :=
  for $s at $pos in $archDoc//s[ft:query(., $lucene-query)]
    let $documentName := util:document-name($s)
    let $collectionName := util:collection-name($s)
    let $fullDocName := concat($collectionName, "/", $documentName)
    let $docRoot := doc($fullDocName)
    let $sArchInfo := $s/root()/archimedes/info
    let $author := string($sArchInfo/author/text())
    let $language := string($sArchInfo/lang/text())
    let $dictLinks := string-util:toSortedLinkedWords($s, $language)
    let $pnOfS := count($docRoot//pb[. << $s])    (: faster: comparision only in pb elements of this document :)
    (: let $pnOfS := count($s/preceding::pb[util:document-name(root()) = $documentName])  too slow: comparision in pb elements of all found documents  :)
    let $posOfS := count($docRoot//pb[$pnOfS]/following::s[. << $s]) + 1    (: faster: comparision only in s elements of this document :)
    let $resultElem := 
      <elem>
        <pos>{$pos}</pos>
        <full-doc>{$fullDocName}</full-doc>
        <name>{$documentName}</name>
        <pn>{$pnOfS}</pn>
        <pos-of-s>{$posOfS}</pos-of-s>
        {$s}
        <links>{$dictLinks}</links>
      </elem>
  order by $author, $pos
  return $resultElem

let $countElems := count($resultElems)
let $countPages := $countElems idiv 10 + 1 

let $pn := fn:number(request:get-parameter("pn", "1"))
let $positionFrom := xs:integer((($pn - 1) * 10) + 1)
let $positionTo := 
  if ($pn = $countPages)
  then $countElems
  else $pn * 10

let $pagesURLs :=
  if ($lang = "0")
  then concat("?ft-query=", $lucene-query)
  else concat("?ft-lang-query=", $lucene-query, "&amp;lang=1&amp;language=", $language)

let $countPagesURLs :=
  for $i in (1 to $countPages)
  let $pageURL := 
    if ($i = $pn) 
    then ($i, " ")
    else (<a href="{$pagesURLs}&amp;pn={$i}">{$i}</a>, " ")
  return $pageURL

let $pageResult := 
  for $elem at $pos in $resultElems
    let $hitPos := xs:integer($elem/pos)
    let $pnOfS := xs:integer($elem/pn)
    let $posOfS := xs:integer($elem/pos-of-s)
    let $docName := $elem/name
    let $linkPageQuery := (<a href="page-query-result.xql?document={$docName}&amp;pn={$pnOfS}&amp;mode=xml">Page {$pnOfS}</a>)
    let $hitLI :=
        <li value="{$hitPos}">
          {$linkPageQuery}, Sentence: {$posOfS}<br></br>
          <b>Sentence: </b>{$elem/s/text()}<br></br>
          <b>Dictionary links: </b>{$elem/links}<br></br>
        </li>
  where $pos >= $positionFrom and $pos <= $positionTo
  return $hitLI 

let $currentTimeEnd := util:system-time()
let $neededTime := time-util:duration-as-ms($currentTimeEnd - $currentTimeBegin)


return
<html>
 <head>
  <title>Result for your query: "{$lucene-query}"</title>
 </head>
<body>
<h2>Result of query: "{$lucene-query}"</h2>
{$positionFrom}-{$positionTo} of {$countElems} results. Page: {$countPagesURLs}
<ol>
{$pageResult}
</ol>
<hr></hr>
<p></p>
Elapsed time: {$neededTime} ms, Back to <a href="query.xql">query page</a>, see the <a href="fulltext-query-result.xql?_source=yes">XQuery source</a> of this page
</body>
</html>