Mercurial > hg > mpdl-group
diff software/eXist/webapp/mpdl/text/all.xql @ 7:5589d865af7a
Erstellung XQL/XSL Applikation
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 15:16:46 +0100 |
parents | |
children | 1ec29fdd0db8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/eXist/webapp/mpdl/text/all.xql Tue Feb 08 15:16:46 2011 +0100 @@ -0,0 +1,333 @@ +xquery version "1.0"; + +module namespace mpdl-text = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/text"; + +import module namespace functx = "http://www.functx.com" at "../util/functx.xql"; + +declare namespace text = "http://exist-db.org/xquery/text"; +declare namespace util = "http://exist-db.org/xquery/util"; +declare namespace local = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/local"; + +declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/"; +declare namespace dcterms="http://purl.org/dc/terms"; + +declare function mpdl-text:insertNodeIdAttribute($element as element()) { + element {node-name($element)} + {$element/@*, attribute { "xmlNodeId" } { util:node-id($element) }, + for $child at $pos in $element/node() + return if ($child instance of element()) + then mpdl-text:insertNodeIdAttribute($child) + else $child + } +}; + +declare function mpdl-text:insertMyNodeIdAttribute($element as element()) { + mpdl-text:insertMyNodeIdAttribute($element, "1") +}; +declare function mpdl-text:insertMyNodeIdAttribute($element as element(), $myNodeId as xs:string) { + element {node-name($element)} + {$element/@*, attribute { "xmlNodeId" } { $myNodeId }, + for $child at $pos in $element/node() + return + if ($child instance of element()) + then mpdl-text:insertMyNodeIdAttribute($child, concat($myNodeId, ".", $pos)) + else $child + } +}; + +declare function mpdl-text:insert($fragment as element(), $externalObjects as element()*) { + let $firstObject := $externalObjects[1] + let $xmlNodeId := $firstObject/@xmlNodeId + let $posNode := $fragment//*[@xmlNodeId = $xmlNodeId] + let $before := $firstObject/@before + let $boolBefore := + if ($before = "true") + then true() + else false() + let $charPosStr := $firstObject/@charPos + let $charPos := + if($charPosStr != "" and not(empty($charPosStr))) + then number($charPosStr) + else -1 + let $newNode := $firstObject/content + let $size := count($externalObjects) + let $otherObjects := + if ($size > 1) + then subsequence($externalObjects, 2, $size) + else () + let $insertedFragment := mpdl-text:insert($fragment, $posNode, $boolBefore, $charPos, $newNode) + let $result := + if ($size >= 1) + then + mpdl-text:insert($insertedFragment, $otherObjects) + else + $fragment + return $result +}; + +declare function mpdl-text:insert($element as element(), $node, $before, $charPos, $newNode) { + if ($element = $node and $before and $charPos = -1) + then + ($newNode, + element {node-name($node)} + {$node/@*, + for $child in $node/node() + return if ($child instance of element()) + then mpdl-text:insert($child, $node, $before, $charPos, $newNode) + else $child + }) + else if ($element = $node and not($before) and $charPos = -1) + then + (element {node-name($node)} + {$node/@*, + for $child in $node/node() + return if ($child instance of element()) + then mpdl-text:insert($child, $node, $before, $charPos, $newNode) + else $child + }, $newNode) + else if ($element = $node and $charPos >= 0) + then + util:parse(mpdltext:insertAtCharPos(util:serialize($node, ()), util:serialize($newNode, ()), $charPos)) + else + element {node-name($element)} + {$element/@*, + for $child in $element/node() + return if ($child instance of element()) + then mpdl-text:insert($child, $node, $before, $charPos, $newNode) + else $child + } +}; + +declare function mpdl-text:indexTerms($mpdlCollectionName, $language, $document, $indexTermsStartStr, $pn as xs:int, $pageSize as xs:int) as node()* { + let $index := + if ($mpdlCollectionName = 'archimedes') + then $document/archimedes/text + else if ($mpdlCollectionName = 'echo') + then $document/echo:echo/echo:text + else $document/archimedes/text + let $from := ($pn * $pageSize) - $pageSize + 1 + let $to := $pn * $pageSize + let $maxTo := 10000 + let $callback := util:function(QName("http://www.mpiwg-berlin.mpg.de/ns/mpdl/local", "local:termEntries"), 2) + let $indexResult := text:index-terms($index, $indexTermsStartStr, $callback, $maxTo) + let $count := count($indexResult) + let $pages := + if ($count = 0) + then 0 + else $count idiv $pageSize + 1 + let $withPolluxKeys := 'true' + let $resultEntries := + for $entry at $pos in $indexResult + let $term := $entry/term + let $lexEntryKeys := + if ($withPolluxKeys = 'true') + then mpdltext:get-lex-entry-keys-by-form-name($language, $term) + else '' + let $lemmasWithOR := mpdltext:get-lemmasstr-by-form-name($language, $term) + where $pos >= $from and $pos <= $to + return + <entry> + <term>{$entry/term}</term> + <pollux-keys>{$lexEntryKeys}</pollux-keys> + <lemmas-with-or>{$lemmasWithOR}</lemmas-with-or> + <frequency>{$entry/frequency}</frequency> + <documents>{$entry/documents}</documents> + <position>{$entry/position}</position> + <rank>{$entry/rank}</rank> + </entry> + let $callbackBig5 := + if ($language = "zh") + then util:function(QName("http://www.mpiwg-berlin.mpg.de/ns/mpdl/local", "local:termEntriesInBig5"), 2) + else () + let $indexResultBig5 := + if ($language = "zh") + then text:index-terms($index, $indexTermsStartStr, $callbackBig5, $maxTo) + else () + let $resultEntriesBig5 := + if ($language = "zh") + then + for $entry at $pos in $indexResultBig5 + where $pos >= $from and $pos <= $to + return $entry + else () + + let $result := + <result> + <size>{$count}</size> + <page-size>{$pageSize}</page-size> + <pages>{$pages}</pages> + <pn>{$pn}</pn> + <hits>{$resultEntries}</hits> + <big5-hits>{$resultEntriesBig5}</big5-hits> + </result> + return $result +}; + +declare function local:termEntries($term as xs:string, $data as xs:int+) { + let $result := + <entry> + <term>{$term}</term> + <frequency>{$data[1]}</frequency> + <documents>{$data[2]}</documents> + <position>{$data[3]}</position> + <rank>{$data[4]}</rank> + </entry> + return $result +}; + +declare function local:termEntriesInBig5($term as xs:string, $data as xs:int+) { + let $encodedTerm := mpdltext:encode-big5($term) + let $result := + <entry-big5-encoded> + <term>{$encodedTerm}</term> + </entry-big5-encoded> + return $result +}; + +declare function mpdl-text:get-toc($docBase, $queryType, $document, $pn as xs:int, $pageSize as xs:int) { + let $from := ($pn * $pageSize) - $pageSize + 1 + let $to := $pn * $pageSize + let $tocEntriesAll := + if ($docBase = 'echo' and $queryType = 'toc') + then $document//echo:div[@type = 'section' or @type = 'chapter'] + else if ($docBase = 'echo' and $queryType = 'figures') + then $document//echo:figure + else if ($docBase = 'archimedes' and $queryType = 'figures') + then $document//figure + else () + let $tocEntriesAllTmp := + for $entry at $pos in $tocEntriesAll + let $pb := + if ($docBase = 'echo') + then $entry/preceding::echo:pb[1] + else $entry/preceding::pb[1] + let $pageNum := + if ($docBase = 'echo') + then count($pb/preceding::echo:pb) + 1 + else count($pb/preceding::pb) + 1 + let $level := + if ($queryType = 'toc') + then number($entry/@level) + else 1 + let $figureCaption := + if ($docBase = 'echo' and $queryType = 'figures' and not(empty($entry/echo:caption))) + then concat(': ', string-join($entry/echo:caption/text(), ' ')) + else '' + let $figureDescription := + if ($docBase = 'echo' and $queryType = 'figures' and not(empty($entry/echo:description))) + then concat(': ', string-join($entry/echo:description/text(), ' ')) + else '' + let $figureVariables := + if ($docBase = 'echo' and $queryType = 'figures' and not(empty($entry/echo:variables))) + then concat(' (Variables: ', string-join($entry/echo:variables/text(), ' '), ')') + else '' + let $figureAllDesc := + if ($docBase = 'echo' and $queryType = 'figures') + then concat($figureCaption, $figureDescription, $figureVariables) + else '' + let $content := + if ($docBase = 'echo' and $queryType = 'toc') + then string-join($entry/echo:head, ' ') + else if ($queryType = 'figures') + then concat('Figure', $figureAllDesc) + else () + return + <toc-entry> + <page>{$pageNum}</page> + <level>{$level}</level> + <content>{$content}</content> + </toc-entry> + let $tocEntriesAllTmpWithLevels := mpdltext:generate-toc-levels($tocEntriesAllTmp)/toc-entries/toc-entry + let $tocEntries := + for $entry at $pos in $tocEntriesAllTmpWithLevels + where $pos >= $from and $pos <= $to + return $entry + let $size := count($tocEntriesAll) + let $pages := + if ($size = 0) + then 0 + else $size idiv $pageSize + 1 + let $result := + <result> + <size>{$size}</size> + <page-size>{$pageSize}</page-size> + <pages>{$pages}</pages> + <pn>{$pn}</pn> + <hits>{$tocEntries}</hits> + </result> + return $result +}; + +declare function mpdl-text:getEchoArchivePath($mpdlDocUri) { + let $documentName := substring-before(substring-after(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/"), ".") + let $language := substring-before(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/") + let $docbase := substring-before(substring-after($mpdlDocUri, "/"), "/") + let $fullDocumentUri := concat('/db/mpdl/documents/standard', $mpdlDocUri) + let $document := doc($fullDocumentUri) + let $metadata := + if ($docbase = 'archimedes') + then $document/archimedes/info + else if ($docbase = 'echo') + then $document/echo:echo/echo:metadata + else '' + let $documentIdentifier := + if ($docbase = 'archimedes') + then $metadata/locator + else if ($docbase = 'echo') + then $metadata/dcterms:identifier + else $metadata/dcterms:identifier + let $echoDocIdentifier := + if ($documentIdentifier != '') + then substring-before(substring-after($documentIdentifier, "ECHO:"), ".") + else '' + let $nausikaaURLTexter := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter" + let $echoImageDir := + if ($docbase = 'archimedes') + then string($metadata/echodir) + else if ($docbase = 'echo') + then string($metadata/echo:echodir) + else '' + let $imagesDocDirectory := + if ($echoImageDir != '') + then $echoImageDir + else if ($docbase = 'archimedes') + then concat("/permanent/archimedes/", $documentName) + else if ($docbase = 'echo') + then concat("/permanent/library/", $echoDocIdentifier) + else '' + let $imagesDocDirectoryIndexMetaUrl := concat($nausikaaURLTexter, "?fn=", $imagesDocDirectory, "/index.meta") + let $digilibAvailable := mpdldoc:check-uri($imagesDocDirectoryIndexMetaUrl, 2000) + let $imagesDocDirectoryIndexMeta := + if ($digilibAvailable) + then doc($imagesDocDirectoryIndexMetaUrl) + else "XXXXDigilibNotAvailableXXXX" + let $archivePath := + if ($digilibAvailable) + then string($imagesDocDirectoryIndexMeta/resource/archive-path) + else $imagesDocDirectoryIndexMeta + return $archivePath +}; + +declare function mpdl-text:transform($inputXml, $xslFileName) { + let $pageXslDoc := doc($xslFileName) + let $result := transform:transform($inputXml, $pageXslDoc, ()) + return $result +}; + +declare function mpdl-text:html2pdf($language, $inputXml, $xslFileName, $title, $pageNumber, $mode) { + let $pageXslDoc := doc($xslFileName) + let $htmlPageFragment := transform:transform($inputXml, $pageXslDoc, ()) + let $topLeftStr := $title + let $topRightStr := concat("Page ", $pageNumber, " (" counter(page) ")") + let $bottomLeftStr := concat("View mode: ", $mode) + let $currentTime := current-dateTime() + let $year := year-from-dateTime($currentTime) + let $month := month-from-dateTime($currentTime) + let $day := day-from-dateTime($currentTime) + let $hours := hours-from-dateTime($currentTime) + let $minutes := minutes-from-dateTime($currentTime) + let $dateStr := concat("", $day, ".", $month, ".", $year, " ", $hours, ":", $minutes) + let $bottomRightStr := $dateStr + let $pdfResult := mpdldoc:html2pdf($htmlPageFragment, $language, $topLeftStr, $topRightStr, $bottomLeftStr, $bottomRightStr) + return $pdfResult +}; \ No newline at end of file