view software/eXist/webapp/mpdl/text/all.xql @ 7:5589d865af7a

Erstellung XQL/XSL Applikation
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 08 Feb 2011 15:16:46 +0100
parents
children 1ec29fdd0db8
line wrap: on
line source

xquery version "1.0";

module namespace mpdl-text = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/text"; 

import module namespace functx = "http://www.functx.com" at "../util/functx.xql";

declare namespace text = "http://exist-db.org/xquery/text";
declare namespace util = "http://exist-db.org/xquery/util";
declare namespace local = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/local";

declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/";
declare namespace dcterms="http://purl.org/dc/terms";

declare function mpdl-text:insertNodeIdAttribute($element as element()) {
  element {node-name($element)}
    {$element/@*, attribute { "xmlNodeId" } { util:node-id($element) },
     for $child at $pos in $element/node()
        return if ($child instance of element())
          then mpdl-text:insertNodeIdAttribute($child)
          else $child
    }
};

declare function mpdl-text:insertMyNodeIdAttribute($element as element()) {
  mpdl-text:insertMyNodeIdAttribute($element, "1")
};
declare function mpdl-text:insertMyNodeIdAttribute($element as element(), $myNodeId as xs:string) {
  element {node-name($element)}
    {$element/@*, attribute { "xmlNodeId" } { $myNodeId },
     for $child at $pos in $element/node()
        return 
          if ($child instance of element())
          then mpdl-text:insertMyNodeIdAttribute($child, concat($myNodeId, ".", $pos))
          else $child
    }
};

declare function mpdl-text:insert($fragment as element(), $externalObjects as element()*) {
  let $firstObject := $externalObjects[1]
  let $xmlNodeId := $firstObject/@xmlNodeId
  let $posNode := $fragment//*[@xmlNodeId = $xmlNodeId]
  let $before := $firstObject/@before
  let $boolBefore := 
    if ($before = "true")
    then true()
    else false()
  let $charPosStr := $firstObject/@charPos
  let $charPos := 
    if($charPosStr != "" and not(empty($charPosStr)))
    then number($charPosStr)
    else -1
  let $newNode := $firstObject/content
  let $size := count($externalObjects)
  let $otherObjects := 
    if ($size > 1) 
    then subsequence($externalObjects, 2, $size)
    else ()
  let $insertedFragment := mpdl-text:insert($fragment, $posNode, $boolBefore, $charPos, $newNode)
  let $result :=
    if ($size >= 1)
    then
      mpdl-text:insert($insertedFragment, $otherObjects)
    else
      $fragment
    return $result
};

declare function mpdl-text:insert($element as element(), $node, $before, $charPos, $newNode) {
  if ($element = $node and $before and $charPos = -1)
  then
  ($newNode,
  element {node-name($node)}
    {$node/@*,
     for $child in $node/node()
        return if ($child instance of element())
          then mpdl-text:insert($child, $node, $before, $charPos, $newNode)
          else $child
    })
  else if ($element = $node and not($before) and $charPos = -1)
  then
  (element {node-name($node)}
    {$node/@*,
     for $child in $node/node()
        return if ($child instance of element())
          then mpdl-text:insert($child, $node, $before, $charPos, $newNode)
          else $child
    }, $newNode)
  else if ($element = $node and $charPos >= 0)
  then
    util:parse(mpdltext:insertAtCharPos(util:serialize($node, ()), util:serialize($newNode, ()), $charPos))
  else
  element {node-name($element)}
    {$element/@*,
     for $child in $element/node()
        return if ($child instance of element())
          then mpdl-text:insert($child, $node, $before, $charPos, $newNode)
          else $child
    }
};

declare function mpdl-text:indexTerms($mpdlCollectionName, $language, $document, $indexTermsStartStr, $pn as xs:int, $pageSize as xs:int) as node()* {
  let $index := 
    if ($mpdlCollectionName = 'archimedes')
    then $document/archimedes/text
    else if ($mpdlCollectionName = 'echo') 
    then $document/echo:echo/echo:text 
    else $document/archimedes/text
  let $from := ($pn * $pageSize) - $pageSize + 1
  let $to := $pn * $pageSize
  let $maxTo := 10000
  let $callback := util:function(QName("http://www.mpiwg-berlin.mpg.de/ns/mpdl/local", "local:termEntries"), 2)
  let $indexResult := text:index-terms($index, $indexTermsStartStr, $callback, $maxTo)
  let $count := count($indexResult)
  let $pages := 
    if ($count = 0)
    then 0
    else $count idiv $pageSize + 1
  let $withPolluxKeys := 'true'
  let $resultEntries := 
    for $entry at $pos in $indexResult
      let $term := $entry/term
      let $lexEntryKeys := 
        if ($withPolluxKeys = 'true')
        then mpdltext:get-lex-entry-keys-by-form-name($language, $term)
        else ''
      let $lemmasWithOR := mpdltext:get-lemmasstr-by-form-name($language, $term)
    where $pos >= $from and $pos <= $to
    return 
      <entry>
        <term>{$entry/term}</term>
        <pollux-keys>{$lexEntryKeys}</pollux-keys>
        <lemmas-with-or>{$lemmasWithOR}</lemmas-with-or>
        <frequency>{$entry/frequency}</frequency>  
        <documents>{$entry/documents}</documents>  
        <position>{$entry/position}</position>  
        <rank>{$entry/rank}</rank>  
      </entry>
  let $callbackBig5 := 
    if ($language = "zh")
    then util:function(QName("http://www.mpiwg-berlin.mpg.de/ns/mpdl/local", "local:termEntriesInBig5"), 2)
    else ()
  let $indexResultBig5 := 
    if ($language = "zh")
    then text:index-terms($index, $indexTermsStartStr, $callbackBig5, $maxTo)
    else ()
  let $resultEntriesBig5 := 
    if ($language = "zh")
    then 
      for $entry at $pos in $indexResultBig5
      where $pos >= $from and $pos <= $to
      return $entry
    else ()

  let $result :=
      <result>
        <size>{$count}</size>
        <page-size>{$pageSize}</page-size>
        <pages>{$pages}</pages>
        <pn>{$pn}</pn>
        <hits>{$resultEntries}</hits>
        <big5-hits>{$resultEntriesBig5}</big5-hits>
      </result>
  return $result
};

declare function local:termEntries($term as xs:string, $data as xs:int+) {
  let $result := 
    <entry>  
       <term>{$term}</term>  
       <frequency>{$data[1]}</frequency>  
       <documents>{$data[2]}</documents>  
       <position>{$data[3]}</position>  
       <rank>{$data[4]}</rank>  
     </entry>  
  return $result
};

declare function local:termEntriesInBig5($term as xs:string, $data as xs:int+) {
  let $encodedTerm := mpdltext:encode-big5($term)
  let $result := 
    <entry-big5-encoded>  
       <term>{$encodedTerm}</term>  
     </entry-big5-encoded>  
  return $result
};

declare function mpdl-text:get-toc($docBase, $queryType, $document, $pn as xs:int, $pageSize as xs:int) {
  let $from := ($pn * $pageSize) - $pageSize + 1
  let $to := $pn * $pageSize
  let $tocEntriesAll := 
    if ($docBase = 'echo' and $queryType = 'toc')
    then $document//echo:div[@type = 'section' or @type = 'chapter']
    else if ($docBase = 'echo' and $queryType = 'figures')
    then $document//echo:figure
    else if ($docBase = 'archimedes' and $queryType = 'figures') 
    then $document//figure
    else ()
  let $tocEntriesAllTmp := 
    for $entry at $pos in $tocEntriesAll
      let $pb := 
        if ($docBase = 'echo')
        then $entry/preceding::echo:pb[1]
        else $entry/preceding::pb[1]
      let $pageNum := 
        if ($docBase = 'echo')
        then count($pb/preceding::echo:pb) + 1
        else count($pb/preceding::pb) + 1
      let $level := 
        if ($queryType = 'toc')
        then number($entry/@level)
        else 1
      let $figureCaption := 
        if ($docBase = 'echo' and $queryType = 'figures' and not(empty($entry/echo:caption)))
        then concat(': ', string-join($entry/echo:caption/text(), ' '))
        else ''
      let $figureDescription := 
        if ($docBase = 'echo' and $queryType = 'figures' and not(empty($entry/echo:description)))
        then concat(': ', string-join($entry/echo:description/text(), ' '))
        else ''
      let $figureVariables := 
        if ($docBase = 'echo' and $queryType = 'figures' and not(empty($entry/echo:variables)))
        then concat(' (Variables: ', string-join($entry/echo:variables/text(), ' '), ')')
        else ''
      let $figureAllDesc :=
        if ($docBase = 'echo' and $queryType = 'figures')
        then concat($figureCaption, $figureDescription, $figureVariables)
        else ''
      let $content := 
        if ($docBase = 'echo' and $queryType = 'toc')
        then string-join($entry/echo:head, ' ')
        else if ($queryType = 'figures')
        then concat('Figure', $figureAllDesc)
        else ()
    return
      <toc-entry> 
        <page>{$pageNum}</page>
        <level>{$level}</level>
        <content>{$content}</content>
      </toc-entry>
  let $tocEntriesAllTmpWithLevels := mpdltext:generate-toc-levels($tocEntriesAllTmp)/toc-entries/toc-entry
  let $tocEntries := 
    for $entry at $pos in $tocEntriesAllTmpWithLevels
    where $pos >= $from and $pos <= $to
    return $entry
  let $size := count($tocEntriesAll)
  let $pages := 
    if ($size = 0)
    then 0
    else $size idiv $pageSize + 1
  let $result := 
      <result>
        <size>{$size}</size>
        <page-size>{$pageSize}</page-size>
        <pages>{$pages}</pages>
        <pn>{$pn}</pn>
        <hits>{$tocEntries}</hits>
      </result>  
  return $result
};

declare function mpdl-text:getEchoArchivePath($mpdlDocUri) {
  let $documentName := substring-before(substring-after(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/"), ".")
  let $language := substring-before(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/")
  let $docbase := substring-before(substring-after($mpdlDocUri, "/"), "/")
  let $fullDocumentUri := concat('/db/mpdl/documents/standard', $mpdlDocUri)
  let $document := doc($fullDocumentUri)
  let $metadata := 
    if ($docbase = 'archimedes')
    then $document/archimedes/info
    else if ($docbase = 'echo')
    then $document/echo:echo/echo:metadata
    else ''
  let $documentIdentifier :=
    if ($docbase = 'archimedes')
    then $metadata/locator
    else if ($docbase = 'echo')
    then $metadata/dcterms:identifier
    else $metadata/dcterms:identifier
  let $echoDocIdentifier := 
    if ($documentIdentifier != '') 
    then substring-before(substring-after($documentIdentifier, "ECHO:"), ".")
    else ''
  let $nausikaaURLTexter := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter"
  let $echoImageDir := 
    if ($docbase = 'archimedes')
    then string($metadata/echodir)
    else if ($docbase = 'echo')
    then string($metadata/echo:echodir)
    else ''
  let $imagesDocDirectory :=
    if ($echoImageDir != '')
    then $echoImageDir
    else if ($docbase = 'archimedes')
    then concat("/permanent/archimedes/", $documentName)
    else if ($docbase = 'echo')
    then concat("/permanent/library/", $echoDocIdentifier)
    else ''
  let $imagesDocDirectoryIndexMetaUrl  := concat($nausikaaURLTexter, "?fn=", $imagesDocDirectory, "/index.meta")
  let $digilibAvailable := mpdldoc:check-uri($imagesDocDirectoryIndexMetaUrl, 2000)
  let $imagesDocDirectoryIndexMeta := 
    if ($digilibAvailable)
    then doc($imagesDocDirectoryIndexMetaUrl)
    else "XXXXDigilibNotAvailableXXXX"
  let $archivePath := 
    if ($digilibAvailable)
    then string($imagesDocDirectoryIndexMeta/resource/archive-path)
    else $imagesDocDirectoryIndexMeta
  return $archivePath
};

declare function mpdl-text:transform($inputXml, $xslFileName) {
  let $pageXslDoc := doc($xslFileName)
  let $result := transform:transform($inputXml, $pageXslDoc, ())
  return $result
};

declare function mpdl-text:html2pdf($language, $inputXml, $xslFileName, $title, $pageNumber, $mode) {
  let $pageXslDoc := doc($xslFileName)
  let $htmlPageFragment := transform:transform($inputXml, $pageXslDoc, ())
  let $topLeftStr := $title
  let $topRightStr := concat("Page ", $pageNumber, " (&quot; counter(page) &quot;)")
  let $bottomLeftStr := concat("View mode: ", $mode)
  let $currentTime := current-dateTime()
  let $year := year-from-dateTime($currentTime)
  let $month := month-from-dateTime($currentTime)
  let $day := day-from-dateTime($currentTime)
  let $hours := hours-from-dateTime($currentTime)
  let $minutes := minutes-from-dateTime($currentTime)
  let $dateStr := concat("", $day, ".", $month, ".", $year, " ", $hours, ":", $minutes)
  let $bottomRightStr := $dateStr
  let $pdfResult := mpdldoc:html2pdf($htmlPageFragment, $language, $topLeftStr, $topRightStr, $bottomLeftStr, $bottomRightStr) 
  return $pdfResult	
};