view software/eXist/webapp/mpdl/page-query-result.xql @ 13:469d927b9ca7

diverse Fehlerbehebungen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 19 Apr 2011 16:51:59 +0200
parents d6f528ad5d96
children e99964f390e4
line wrap: on
line source

xquery version "1.0";

import module namespace mpdl-time = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/util/time" at "util/time.xql";
import module namespace functx = "http://www.functx.com" at "util/functx.xql";
import module namespace mpdl-lucene = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/lucene/search" at "lucene/search.xql";
import module namespace mpdl-text = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/text" at "text/all.xql";

declare namespace xlink="http://www.w3.org/1999/xlink";
declare namespace request = "http://exist-db.org/xquery/request";
declare namespace transform = "http://exist-db.org/xquery/transform";
declare namespace util = "http://exist-db.org/xquery/util";

declare namespace dcterms="http://purl.org/dc/terms";
declare namespace xhtml="http://www.w3.org/1999/xhtml";
declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/";
declare namespace TEI="http://www.tei-c.org/ns/1.0";

let $mpdlDocUri := request:get-parameter("document", "")
let $queryType := request:get-parameter("query-type", "")
let $mode := request:get-parameter("mode", "image")
let $reqPN := number(request:get-parameter("pn", "-1"))
let $reqPF := request:get-parameter("pf", "")
let $reqSN := number(request:get-parameter("sn", "-1"))
let $xPointer := request:get-parameter("xpointer", "")
let $query := request:get-parameter("query", "")
let $reqQueryResultPN := request:get-parameter("query-result-pn", "")
let $queryResultPN := 
  if ($reqQueryResultPN = '' or $reqQueryResultPN = '0')
  then 1
  else number($reqQueryResultPN)
let $regCharNorm := request:get-parameter("characterNormalization", "")
let $tmpCharNorm := string-join($regCharNorm, ',')
let $charNorm := 
  if($tmpCharNorm = "regPlusNorm")
  then "reg,norm"
  else $tmpCharNorm
let $reqExport := request:get-parameter("export", "")
let $options := string(request:get-parameter("options", ""))

let $presentationPath := "/db/mpdl/presentation"
(: e.g. mpdlCollectioName is derived from mpdlDocUri: /archimedes/la/yourDoc.xml  :)
let $documentName := substring-before(substring-after(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/"), ".")
let $language := substring-before(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/")
let $docbase := substring-before(substring-after($mpdlDocUri, "/"), "/")
let $fullDocumentUri := 
  if ($queryType = 'fulltext' or $queryType = 'ftIndex')
  then concat('/db/mpdl/documents/standard', $mpdlDocUri)
  else if ($queryType = 'fulltextMorph' or $queryType = 'fulltextMorphLemma' or $queryType = 'ftIndexMorph')
  then concat('/db/mpdl/documents/morph', $mpdlDocUri)
  else concat('/db/mpdl/documents/morph', $mpdlDocUri)
let $currentTimeBegin := util:system-time()
let $documentAvailable := doc-available($fullDocumentUri)
let $document := doc($fullDocumentUri)
let $metadata := 
  if ($docbase = 'archimedes')
  then $document/archimedes/info
  else if ($docbase = 'echo')
  then $document/echo:echo/echo:metadata
  else if ($docbase = 'tei')
  then $document/TEI:TEI/TEI:teiHeader
  else ''

let $pageBreaks := 
  if ($docbase = 'archimedes')
  then $document//pb
  else if ($docbase = 'echo')
  then $document//echo:pb
  else if ($docbase = 'tei')
  then $document//TEI:pb
  else $document//pb
let $countPagesTemp := count($pageBreaks)
let $countPages := 
  if ($countPagesTemp > 0)
  then $countPagesTemp
  else 1

(: xQuery inline execution does not work in module so it has to be done here  :) 
let $xQueryPageSize := 1000
let $xQueryResultEval := 
  if ($queryType = 'xpath' or $queryType = 'xquery' and $query != "")
  then util:eval-inline($document, $query)
  else ()
let $xQueryFrom := ($queryResultPN * $xQueryPageSize) - $xQueryPageSize + 1
let $xQueryTo := $queryResultPN * $xQueryPageSize
let $xQueryResultEntries := 
  for $entry at $pos in $xQueryResultEval
  where $pos >= $xQueryFrom and $pos <= $xQueryTo
  return $entry
let $xQuerySize := count($xQueryResultEval)
let $xQueryPages := 
  if ($xQuerySize = 0)
  then 0
  else $xQuerySize idiv $xQueryPageSize + 1
let $xQueryResult := 
      <result>
        <size>{$xQuerySize}</size>
        <page-size>{$xQueryPageSize}</page-size>
        <pages>{$xQueryPages}</pages>
        <pn>{$queryResultPN}</pn>
        <hits>{$xQueryResultEntries}</hits>
      </result>  

let $queryResult := 
  if (($queryType = 'fulltext' or $queryType = 'fulltextMorph' or $queryType = 'fulltextMorphLemma') and $query != "")
  then mpdl-lucene:search($docbase, $language, $document, $queryType, $query, $queryResultPN, 10)
  else if (($queryType = 'ftIndex' or $queryType = 'ftIndexMorph') and $query != "")
  then mpdl-text:indexTerms($docbase, $language, $document, $query, $queryResultPN, 100)
  else if ($queryType = 'xpath' or $queryType = 'xquery' and $query != "")
  then $xQueryResult
  else if ($queryType = 'toc' or $queryType = 'figures')
  then mpdl-text:get-toc($docbase, $queryType, $document, $queryResultPN, 100)
  else if ($query = "")
  then 
      <result>
        <size>0</size>
        <pages>0</pages>
        <pn>0</pn>
        <hits/>
      </result>
  else ()

let $countHits := count($queryResult/result/hits/hit)
let $firstHit := $queryResult/result/hits/hit[1]
(: jump to first pn and sn hit in fulltext mode   :)
let $pn := 
  if (($queryType = 'fulltext' or $queryType = 'fulltextMorph' or $queryType = 'fulltextMorphLemma') and $countHits > 0 and $reqPN <= 0)
  then number($firstHit/pn)
  else if ($reqPN = -1)
  then 1
  else $reqPN
let $sn := 
  if (($queryType = 'fulltext' or $queryType = 'fulltextMorph' or $queryType = 'fulltextMorphLemma') and $countHits > 0 and $reqPN <= 0 and $reqSN < 0)
  then number($firstHit/pos-of-s)
  else $reqSN

(: 10 or more is an error :)
let $errorCode := 
  if (not($documentAvailable))
  then 10
  else if ($countPagesTemp != 0 and ($pn > $countPagesTemp or $pn <= 0))
  then 11 
  else if ($countPagesTemp = 0) 
  then 1    (: if no page break is found then the document should have exactly one page   :)
  else if (not($mode = "text"  or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml" or $mode = "pureXml"))
  then 12
  else 0

let $pb1 := 
  if ($errorCode = 0)
  then subsequence($pageBreaks, $pn, 1)
  else if ($errorCode = 1)
  then subsequence(mpdl-lucene:getText($docbase, $document), 1, 1)
  else ()
let $pb2 := 
  if ($errorCode = 0)
  then subsequence($pageBreaks, $pn + 1, 1)
  else if ($errorCode = 1)
  then subsequence(mpdl-lucene:getText($docbase, $document), 2, 1)
  else ()
let $pageHeader := string($pb1/@rhead)
let $pageNumberOrig := string($pb1/@o)

let $documentIdentifier :=
  if ($docbase = 'archimedes')
  then $metadata/locator
  else if ($docbase = 'echo')
  then $metadata/dcterms:identifier
  else if ($docbase = 'tei')
  then $metadata/TEI:fileDesc/TEI:publicationStmt/TEI:idno
  else $metadata/dcterms:identifier
let $echoDocIdentifier := 
  if ($documentIdentifier != '')
  then substring-before(substring-after($documentIdentifier, "ECHO:"), ".")
  else ''
let $echoURLZogilib := "http://echo.mpiwg-berlin.mpg.de/zogilib"
let $nausikaaURLScaler := "http://nausikaa2.rz-berlin.mpg.de/digitallibrary/servlet/Scaler"
let $nausikaaURLDlInfo := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/dlInfo-xml.jsp"
let $nausikaaURLTexter := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter"
let $echoImageDir := 
  if ($docbase = 'archimedes')
  then string($metadata/echodir)
  else if ($docbase = 'echo')
  then string($metadata/echo:echodir)
  else ''
let $imagesDocDirectory :=
  if ($echoImageDir != '')
  then $echoImageDir
  else if ($docbase = 'archimedes')
  then concat("/permanent/archimedes/", $documentName)
  else if ($docbase = 'echo')
  then concat("/permanent/library/", $echoDocIdentifier)
  else if ($docbase = 'tei')
  then $documentIdentifier
  else ''
let $imagesDocDirectoryIndexMetaUrl := 
  if ($mode = "image" or $mode = "text" or $mode = "textPollux" or $mode = "gis")
  then concat($nausikaaURLTexter, "?fn=", $imagesDocDirectory, "/index.meta")
  else ()
let $digilibAvailable := mpdldoc:check-uri($imagesDocDirectoryIndexMetaUrl, ())
let $imagesDocDirectoryIndexMeta := 
  if (($mode = "image" or $mode = "text" or $mode = "textPollux" or $mode = "gis") and $digilibAvailable)
  then doc($imagesDocDirectoryIndexMetaUrl)
  else ()
let $pageImageDirectory := string($imagesDocDirectoryIndexMeta/resource/meta/texttool/image)
let $figuresImageDirectoryTemp := string($imagesDocDirectoryIndexMeta/resource/meta/texttool/figures)
let $figuresImageDirectory := 
  if ($figuresImageDirectoryTemp != '')
  then $figuresImageDirectoryTemp
  else concat(substring-before($pageImageDirectory, "pageimg"), "figures")
let $pageImageFileNameWithoutExtension := 
  if ($docbase = 'echo')
  then concat("/", string($pb1/@file))
  else if ($docbase = 'tei')
  then concat("/", string($pb1/@facs))
  else ''
let $imageFileName :=
  if ($reqPF = '')
  then concat($imagesDocDirectory, "/", $pageImageDirectory, $pageImageFileNameWithoutExtension)
  else $reqPF
let $imageEcho := <image-echo>{$echoURLZogilib}?fn={$imageFileName}&amp;pn={$pn}</image-echo>
let $imageScaler := <image-scaler>{$nausikaaURLScaler}?fn={$imageFileName}&amp;pn={$pn}</image-scaler>

let $imageFileNameUrl := concat($nausikaaURLDlInfo, "?fn=", $imageFileName)
let $testImageResult := 
  if ($mode = 'image' and $digilibAvailable)
  then doc($imageFileNameUrl)
  else ()
let $testImageResultParamImgFn := string($testImageResult//parameter[@name='img.fn']/@value)
let $imageIsAvailable := 
  if ($testImageResultParamImgFn = '' and $reqPF = '')
  then 'false'
  else 'true'

let $positionOfFirstFigureAfterPB1 := 
  if ($docbase = 'archimedes')
  then count($pb1/following::figure[1]/preceding::figure) + 1
  else if ($docbase = 'echo')
  then count($pb1/following::echo:figure[1]/preceding::echo:figure) + 1
  else if ($docbase = 'tei')
  then count($pb1/following::TEI:figure[1]/preceding::TEI:figure) + 1
  else ()

let $pageFragmentTmp := 
  if ($mode = "image" or $errorCode > 9)
  then ()
  else if ($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "xml" or $mode = "pureXml")
  then util:get-fragment-between($pb1, $pb2, true())
  else ()
(: replace the soft hyphen (Unicode character for 00AD) just before the line break by a normal hyphen :)
(: delete the hyphen just before the line break in case of options=withoutLBs :)
let $pageFragment :=
  if (($mode = "text" or $mode = "textPollux") and not(contains($options, "withoutLBs")) and contains($pageFragmentTmp, "­<lb"))
  then replace($pageFragmentTmp, "­<lb", "-<lb")
  else if (($mode = "text" or $mode = "textPollux") and contains($options, "withoutLBs") and contains($pageFragmentTmp, "-<lb"))
  then replace($pageFragmentTmp, "-<lb", "<lb")
  else $pageFragmentTmp
let $pageFragmentNormalized := 
  if ($mode = "image" or $errorCode > 9)
  then ()
  else if (($mode = "text" or $mode = "textPollux" or $mode = "gis") and $charNorm = "")
  then mpdltext:normalizeChars('reg,norm', $language, $pageFragment)
  else if (($mode = "xml" or $mode = "pureXml") and $charNorm = "")
  then $pageFragment
  else if (($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "xml" or $mode = "pureXml") and $charNorm != "")
  then mpdltext:normalizeChars($charNorm, $language, $pageFragment)
  else ()
let $retPageFragment := 
  if ($mode = "image" or $errorCode > 9)
  then ()
  else if ($mode = "text" or $mode = "gis" or $mode = "xml" or $mode = "pureXml")
  then $pageFragmentNormalized
  else if ($mode = "textPollux")
  then mpdltext:dictionarize($pageFragmentNormalized, $language)
  else ()
let $returnPageFragmentTmp := util:parse($retPageFragment)  (: returns a valid xml document for that string   :)  

let $externalElementsTmpTmp := mpdltext:externalObject("read", "element", concat("<object uid=&quot;joe&quot; documentId=&quot;", $mpdlDocUri, "&quot; xpointer=&quot;", "#xpointer(id(", "'page", $pn, "'", "))&quot;></object>"))
let $externalElementsTmp := 
  if(not($externalElementsTmpTmp = ""))
  then util:parse($externalElementsTmpTmp)
  else ()
let $externalElements := $externalElementsTmp/result/object
let $containsExternalElements := 
  if(not(empty($externalElements)))
  then true()
  else false()
let $returnPageFragmentWithExtObjects :=
  <result>
    <externalElements>{$externalElements}</externalElements>
    <xpointer>{$xPointer}</xpointer>
    <fragment>{$returnPageFragmentTmp}</fragment>
  </result>
let $returnPageFragment := 
  if (contains($options, "withXmlNodeId") or $containsExternalElements or $xPointer != '')
  then mpdl-text:transform($returnPageFragmentWithExtObjects, concat($presentationPath, "/insertExternalElements.xsl"))
  else $returnPageFragmentTmp

let $pageFigureAnchors := $returnPageFragment//anchor[@type = 'figure']
let $pageFigures :=
    for $pageFigureAnchor in $pageFigureAnchors
      let $figureHref := string($pageFigureAnchor/@xlink:href)
      let $pageFigureTmp := $document//echo:figure[@xlink:label = $figureHref]
      let $pageFigure := subsequence($pageFigureTmp, 1, 1)
    return 
      $pageFigure
let $pageHandwrittenAnchors := $returnPageFragment//anchor[@type = 'handwritten']
let $pageHandwritten :=
    for $pageHandwrittenAnchor in $pageHandwrittenAnchors
      let $handwrittenHref := string($pageHandwrittenAnchor/@xlink:href)
      let $pageHandwrittenTmp := $document//echo:handwritten[@xlink:label = $handwrittenHref]
      let $pageHandwritten := subsequence($pageHandwrittenTmp, 1, 1)
    return 
      $pageHandwritten
let $pageTableAnchors := $returnPageFragment//anchor[@type = 'table']
let $pageTables :=
    for $pageTableAnchor in $pageTableAnchors
      let $tableHref := string($pageTableAnchor/@xlink:href)
      let $pageTableTmp := $document//xhtml:table[@xlink:label = $tableHref]
      let $pageTable := subsequence($pageTableTmp, 1, 1)
    return 
      $pageTable
let $pageNoteAnchors := $returnPageFragment//anchor[@type = 'note']
let $pageNotes :=
  if ($docbase = "echo")
  then
    for $pageNoteAnchor in $pageNoteAnchors
      let $noteHref := string($pageNoteAnchor/@xlink:href)
      let $pageNoteTmp := $document//echo:note[@xlink:label = $noteHref]
      let $pageNote := subsequence($pageNoteTmp, 1, 1)
    return 
      $pageNote
  else
    $returnPageFragment//note

(: Metadata handling: only metadata of the selected document is scanned   :)
let $identifier := $documentIdentifier
let $authors := mpdl-lucene:getElementsByAttr($metadata, $docbase, "author")
let $titles := mpdl-lucene:getElementsByAttr($metadata, $docbase, "title")
let $places := mpdl-lucene:getElementsByAttr($metadata, $docbase, "place")
let $date := mpdl-lucene:getElementsByAttr($metadata, $docbase, "date")
let $rights := mpdl-lucene:getElementsByAttr($metadata, $docbase, "rights")
let $accessRights := mpdl-lucene:getElementsByAttr($metadata, $docbase, "accessRights")
let $licenses := mpdl-lucene:getElementsByAttr($metadata, $docbase, "license")
let $file := mpdl-lucene:getElementsByAttr($metadata, $docbase, "file")
let $translator := mpdl-lucene:getElementsByAttr($metadata, $docbase, "translator")
let $version := mpdl-lucene:getElementsByAttr($metadata, $docbase, "version")

let $currentTimeEnd := util:system-time()
let $neededTime := mpdl-time:duration-as-ms($currentTimeEnd - $currentTimeBegin)

let $xmlResult := 
  if ($errorCode < 10)
  then 
    <result>
      <document-description>
        <uri>{$mpdlDocUri}</uri>
        <collection-name>{$docbase}</collection-name>
        <document-name>{$documentName}</document-name>
        <language>{$language}</language>
        <authors>{$authors}</authors>
        <titles>{$titles}</titles>
        <places>{$places}</places>
        <date>{$date}</date>
        <identifier>{$identifier}</identifier>
        <rights>{$rights}</rights>
        <accessRights>{$accessRights}</accessRights>
        <licenses>{$licenses}</licenses>
        <file>{$file}</file>
        <translator>{$translator}</translator>
        <version>{$version}</version>
        <count-pages>{$countPages}</count-pages>
      </document-description>
      <page>
        <mode>{$mode}</mode>
        <number>{$pn}</number>
        <sentence-number>{$sn}</sentence-number>
        <header>{$pageHeader}</header>
        <number-orig>{$pageNumberOrig}</number-orig>
        <digilib-available>{$digilibAvailable}</digilib-available>
        <image-available>{$imageIsAvailable}</image-available>
        <image-file-name>{$imageFileName}</image-file-name>
        {$imageEcho}
        {$imageScaler}
        <xml-url>?document={$documentName}&amp;pn={$pn}&amp;mode=xml</xml-url>
        <page-image-directory>{$imagesDocDirectory}/{$pageImageDirectory}</page-image-directory>
        <figures-image-directory>{$imagesDocDirectory}/{$figuresImageDirectory}</figures-image-directory>
        <firstFigurePosition>{$positionOfFirstFigureAfterPB1}</firstFigurePosition>
        <figures>{$pageFigures}</figures>
        <handwritten>{$pageHandwritten}</handwritten>
        <tables>{$pageTables}</tables>
        <notes>{$pageNotes}</notes>
        <content>{$returnPageFragment}</content>
        <character-normalization>{$charNorm}</character-normalization>
        <options>{$options}</options>
      </page>
      <query>
        <type>{$queryType}</type>
        <expression>{$query}</expression>
        {$queryResult}
      </query>
      <performance>{$neededTime}</performance>
    </result>
  else if ($errorCode = 10)
  then <error><number>{$errorCode}</number><description>Can't find fulltext document: {$mpdlDocUri} </description></error>
  else if ($errorCode = 11)
  then <error><number>{$errorCode}</number><description>No result: Page {$pn} not found</description></error>
  else if ($errorCode = 12)
  then <error><number>{$errorCode}</number><description>View mode {$mode} not available</description></error>
  else <error><number>{$errorCode}</number><description>undefined error: {$errorCode}</description></error>  

let $declare := 
  if ($errorCode > 9 or $mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml")
  then util:declare-option("exist:serialize", "method=xhtml media-type=text/html omit-xml-declaration=no indent=no encoding=utf-8")
  else if ($mode = "pureXml")
  then util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8")
  else util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8")
let $xslFilePath := 
  if($reqExport = "pdf")
  then concat($presentationPath, "/pageFragmentHtml.xsl")
  else if($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml")
  then concat($presentationPath, "/pageHtml.xsl")
  else concat($presentationPath, "/pageXml.xsl")

let $titleStr := concat(string-join($authors, ', '), ". ", string-join($titles, ', '), ". ", string-join($places, ', '), " ", $date, ".")
let $tmpResult :=
  if ($errorCode < 10 and $reqExport = "pdf")
  then mpdl-text:html2pdf($language, $xmlResult, $xslFilePath, $titleStr, $pn, $mode) 
  else if ($errorCode < 10 and not($reqExport = "pdf"))
  then mpdl-text:transform($xmlResult, $xslFilePath) 
  else 
    <div>{$xmlResult}</div>  (:  error xml result  :)
let $result :=
  if ($errorCode < 10 and $reqExport = "pdf")
  then response:stream-binary($tmpResult, "application/pdf", concat($documentName, "-page", $pn, ".pdf"))
  else $tmpResult
  
let $setHeaderXmlFilename := 
  if ($mode = "pureXml" and $queryType = "xpath" and $query != "")
  then response:set-header('Content-Disposition', concat('filename=', $documentName, '-xpath-result--', $query, '--'))
  else if ($mode = "pureXml" and $queryType = "xquery" and $query != "")
  then response:set-header('Content-Disposition', concat('filename=', 'xquery-result'))
  else if ($mode = "pureXml") 
  then response:set-header('Content-Disposition', concat('filename=', $documentName, '-page', $pn)) 
  else ()

return $result