Mercurial > hg > mpdl-group
view software/eXist/webapp/mpdl/interface/page-fragment.xql @ 13:469d927b9ca7
diverse Fehlerbehebungen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 19 Apr 2011 16:51:59 +0200 |
parents | d6f528ad5d96 |
children | e99964f390e4 |
line wrap: on
line source
xquery version "1.0"; import module namespace mpdl-time = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/util/time" at "../util/time.xql"; import module namespace functx = "http://www.functx.com" at "../util/functx.xql"; import module namespace mpdl-lucene = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/lucene/search" at "../lucene/search.xql"; import module namespace mpdl-text = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/text" at "../text/all.xql"; declare namespace xlink="http://www.w3.org/1999/xlink"; declare namespace request = "http://exist-db.org/xquery/request"; declare namespace transform = "http://exist-db.org/xquery/transform"; declare namespace util = "http://exist-db.org/xquery/util"; declare namespace dcterms="http://purl.org/dc/terms"; declare namespace xhtml="http://www.w3.org/1999/xhtml"; declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/"; declare namespace TEI="http://www.tei-c.org/ns/1.0"; let $mpdlDocUri := request:get-parameter("document", "") let $mode := request:get-parameter("mode", "image") let $reqPN := number(request:get-parameter("pn", "-1")) let $reqPF := request:get-parameter("pf", "") let $reqSN := number(request:get-parameter("sn", "-1")) let $xPointer := request:get-parameter("xpointer", "") let $highlightQuery := request:get-parameter("highlightQuery", "") let $regCharNorm := request:get-parameter("characterNormalization", "") let $tmpCharNorm := string-join($regCharNorm, ',') let $charNorm := if($tmpCharNorm = "regPlusNorm") then "reg,norm" else $tmpCharNorm let $reqExport := request:get-parameter("export", "") let $options := string(request:get-parameter("options", "")) let $presentationPath := "/db/mpdl/presentation" (: e.g. mpdlCollectioName is derived from mpdlDocUri: /archimedes/la/yourDoc.xml :) let $documentName := substring-before(substring-after(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/"), ".") let $language := substring-before(substring-after(substring-after($mpdlDocUri, "/"), "/"), "/") let $docbase := substring-before(substring-after($mpdlDocUri, "/"), "/") let $fullDocumentUri := concat('/db/mpdl/documents/morph', $mpdlDocUri) let $currentTimeBegin := util:system-time() let $documentAvailable := doc-available($fullDocumentUri) let $document := doc($fullDocumentUri) let $metadata := if ($docbase = 'archimedes') then $document/archimedes/info else if ($docbase = 'echo') then $document/echo:echo/echo:metadata else if ($docbase = 'tei') then $document/TEI:TEI/TEI:teiHeader else '' let $pageBreaks := if ($docbase = 'archimedes') then $document//pb else if ($docbase = 'echo') then $document//echo:pb else if ($docbase = 'tei') then $document//TEI:pb else $document//pb let $countPagesTemp := count($pageBreaks) let $countPages := if ($countPagesTemp > 0) then $countPagesTemp else 1 (: for performance reasons: deliver count of gis places and toc/figure entries :) let $gisPlaces := if ($docbase = 'echo') then $document//echo:place else () let $countGisPlaces := count($gisPlaces) let $tocEntries := if ($docbase = 'echo') then $document//echo:div[@type = 'section' or @type = 'chapter'] else () let $figureEntries := if ($docbase = 'echo') then $document//echo:figure else if ($docbase = 'archimedes') then $document//figure else () let $countTocEntries := count($tocEntries) let $countFigureEntries := count($figureEntries) (: jump to first pn and sn hit in fulltext mode :) let $pn := if ($reqPN = -1) then 1 else $reqPN let $sn := $reqSN (: 10 or more is an error :) let $errorCode := if (not($documentAvailable)) then 10 else if ($countPagesTemp != 0 and ($pn > $countPagesTemp or $pn <= 0)) then 11 else if ($countPagesTemp = 0) then 1 (: if no page break is found then the document should have exactly one page :) else if (not($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml" or $mode = "pureXml")) then 12 else 0 let $pb1 := if ($errorCode = 0) then subsequence($pageBreaks, $pn, 1) else if ($errorCode = 1) then subsequence(mpdl-lucene:getText($docbase, $document), 1, 1) else () let $pb2 := if ($errorCode = 0) then subsequence($pageBreaks, $pn + 1, 1) else if ($errorCode = 1) then subsequence(mpdl-lucene:getText($docbase, $document), 2, 1) else () let $pageHeader := string($pb1/@rhead) let $pageNumberOrig := string($pb1/@o) let $pageNumberOrigNorm := string($pb1/@o-norm) let $documentIdentifier := if ($docbase = 'archimedes') then $metadata/locator else if ($docbase = 'echo') then $metadata/dcterms:identifier else if ($docbase = 'tei') then $metadata/TEI:fileDesc/TEI:publicationStmt/TEI:idno else $metadata/dcterms:identifier let $echoDocIdentifier := if ($documentIdentifier != '') then substring-before(substring-after($documentIdentifier, "ECHO:"), ".") else '' let $echoURLZogilib := "http://echo.mpiwg-berlin.mpg.de/zogilib" let $nausikaaURLScaler := "http://nausikaa2.rz-berlin.mpg.de/digitallibrary/servlet/Scaler" let $nausikaaURLDlInfo := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/dlInfo-xml.jsp" let $nausikaaURLTexter := "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter" let $echoImageDir := if ($docbase = 'archimedes') then string($metadata/echodir) else if ($docbase = 'echo') then string($metadata/echo:echodir) else '' let $imagesDocDirectory := if ($echoImageDir != '') then $echoImageDir else if ($docbase = 'archimedes') then concat("/permanent/archimedes/", $documentName) else if ($docbase = 'echo') then concat("/permanent/library/", $echoDocIdentifier) else if ($docbase = 'tei') then $documentIdentifier else '' let $imagesDocDirectoryIndexMetaUrl := if ($mode = "image" or $mode = "text" or $mode = "textPollux" or $mode = "gis") then concat($nausikaaURLTexter, "?fn=", $imagesDocDirectory, "/index.meta") else () let $digilibAvailable := mpdldoc:check-uri($imagesDocDirectoryIndexMetaUrl, 2000) let $imagesDocDirectoryIndexMeta := if (($mode = "image" or $mode = "text" or $mode = "textPollux" or $mode = "gis") and $digilibAvailable) then doc($imagesDocDirectoryIndexMetaUrl) else () let $pageImageDirectory := string($imagesDocDirectoryIndexMeta/resource/meta/texttool/image) let $figuresImageDirectoryTemp := string($imagesDocDirectoryIndexMeta/resource/meta/texttool/figures) let $figuresImageDirectory := if ($figuresImageDirectoryTemp != '') then $figuresImageDirectoryTemp else concat(substring-before($pageImageDirectory, "pageimg"), "figures") let $pageImageFileNameWithoutExtension := if ($docbase = 'echo') then concat("/", string($pb1/@file)) else if ($docbase = 'tei') then concat("/", string($pb1/@facs)) else '' let $imageFileName := if ($reqPF = '') then concat($imagesDocDirectory, "/", $pageImageDirectory, $pageImageFileNameWithoutExtension) else $reqPF let $imageEcho := <image-echo>{$echoURLZogilib}?fn={$imageFileName}&pn={$pn}</image-echo> let $imageScaler := <image-scaler>{$nausikaaURLScaler}?fn={$imageFileName}&pn={$pn}</image-scaler> let $imageFileNameUrl := concat($nausikaaURLDlInfo, "?fn=", $imageFileName) let $testImageResult := if ($mode = 'image' and $digilibAvailable) then doc($imageFileNameUrl) else () let $testImageResultParamImgFn := string($testImageResult//parameter[@name='img.fn']/@value) let $imageIsAvailable := if ($testImageResultParamImgFn = '' and $reqPF = '') then 'false' else 'true' let $positionOfFirstFigureAfterPB1 := if ($docbase = 'archimedes') then count($pb1/following::figure[1]/preceding::figure) + 1 else if ($docbase = 'echo') then count($pb1/following::echo:figure[1]/preceding::echo:figure) + 1 else if ($docbase = 'tei') then count($pb1/following::TEI:figure[1]/preceding::TEI:figure) + 1 else () let $pageFragmentTmp := if ($mode = "image" or $errorCode > 9) then () else if ($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "xml" or $mode = "pureXml") then util:get-fragment-between($pb1, $pb2, true()) else () (: replace the soft hyphen (Unicode character for 00AD) just before the line break by a normal hyphen :) (: delete the hyphen just before the line break in case of options=withoutLBs :) let $pageFragment := if (($mode = "text" or $mode = "textPollux") and not(contains($options, "withoutLBs")) and contains($pageFragmentTmp, "<lb")) then replace($pageFragmentTmp, "<lb", "-<lb") else if (($mode = "text" or $mode = "textPollux") and contains($options, "withoutLBs") and contains($pageFragmentTmp, "-<lb")) then replace($pageFragmentTmp, "-<lb", "<lb") else $pageFragmentTmp let $pageFragmentNormalized := if ($mode = "image" or $errorCode > 9) then () else if (($mode = "text" or $mode = "textPollux" or $mode = "gis") and $charNorm = "") then mpdltext:normalizeChars('reg,norm', $language, $pageFragment) else if (($mode = "xml" or $mode = "pureXml") and $charNorm = "") then $pageFragment else if (($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "xml" or $mode = "pureXml") and $charNorm != "") then mpdltext:normalizeChars($charNorm, $language, $pageFragment) else () let $retPageFragment := if ($mode = "image" or $errorCode > 9) then () else if ($mode = "text" or $mode = "gis" or $mode = "xml" or $mode = "pureXml") then $pageFragmentNormalized else if ($mode = "textPollux") then mpdltext:dictionarize($pageFragmentNormalized, $language) else () let $returnPageFragmentTmp := util:parse($retPageFragment) (: returns a valid xml document for that string :) let $externalElementsTmpTmp := mpdltext:externalObject("read", "element", concat("<object uid="joe" documentId="", $mpdlDocUri, "" xpointer="", "#xpointer(id(", "'page", $pn, "'", "))"></object>")) let $externalElementsTmp := if(not($externalElementsTmpTmp = "")) then util:parse($externalElementsTmpTmp) else () let $externalElements := $externalElementsTmp/result/object let $containsExternalElements := if(not(empty($externalElements))) then true() else false() let $returnPageFragmentWithExtObjects := <result> <externalElements>{$externalElements}</externalElements> <xpointer>{$xPointer}</xpointer> <fragment>{$returnPageFragmentTmp}</fragment> </result> let $returnPageFragment := if (contains($options, "withXmlNodeId") or $containsExternalElements or $xPointer != '') then mpdl-text:transform($returnPageFragmentWithExtObjects, concat($presentationPath, "/insertExternalElements.xsl")) else $returnPageFragmentTmp let $pageFigureAnchors := $returnPageFragment//anchor[@type = 'figure'] let $pageFigures := for $pageFigureAnchor in $pageFigureAnchors let $href := string($pageFigureAnchor/@xlink:href) let $pageFigureTmp := $document//echo:figure[@xlink:label = $href] let $pageFigure := subsequence($pageFigureTmp, 1, 1) return $pageFigure let $pageHandwrittenAnchors := $returnPageFragment//anchor[@type = 'handwritten'] let $pageHandwritten := for $pageHandwrittenAnchor in $pageHandwrittenAnchors let $handwrittenHref := string($pageHandwrittenAnchor/@xlink:href) let $pageHandwrittenTmp := $document//echo:handwritten[@xlink:label = $handwrittenHref] let $pageHandwritten := subsequence($pageHandwrittenTmp, 1, 1) return $pageHandwritten let $pageTableAnchors := $returnPageFragment//anchor[@type = 'table'] let $pageTables := for $pageTableAnchor in $pageTableAnchors let $tableHref := string($pageTableAnchor/@xlink:href) let $pageTableTmp := $document//xhtml:table[@xlink:label = $tableHref] let $pageTable := subsequence($pageTableTmp, 1, 1) return $pageTable let $pageNoteAnchors := $returnPageFragment//anchor[@type = 'note'] let $pageNotes := if ($docbase = "echo") then for $pageNoteAnchor in $pageNoteAnchors let $noteHref := string($pageNoteAnchor/@xlink:href) let $pageNoteTmp := $document//echo:note[@xlink:label = $noteHref] let $pageNote := subsequence($pageNoteTmp, 1, 1) return $pageNote else $returnPageFragment//note (: Metadata handling: only metadata of the selected document is scanned :) let $identifier := $documentIdentifier let $authors := mpdl-lucene:getElementsByAttr($metadata, $docbase, "author") let $titles := mpdl-lucene:getElementsByAttr($metadata, $docbase, "title") let $places := mpdl-lucene:getElementsByAttr($metadata, $docbase, "place") let $date := mpdl-lucene:getElementsByAttr($metadata, $docbase, "date") let $rights := mpdl-lucene:getElementsByAttr($metadata, $docbase, "rights") let $accessRights := mpdl-lucene:getElementsByAttr($metadata, $docbase, "accessRights") let $licenses := mpdl-lucene:getElementsByAttr($metadata, $docbase, "license") let $file := mpdl-lucene:getElementsByAttr($metadata, $docbase, "file") let $translator := mpdl-lucene:getElementsByAttr($metadata, $docbase, "translator") let $version := mpdl-lucene:getElementsByAttr($metadata, $docbase, "version") let $highlightQueryWordsTemp := if ($highlightQuery != '') then mpdltext:get-query-morph-forms($language, $highlightQuery) else '' let $highlightQueryRegularizations := if ($highlightQuery != '') then mpdltext:get-query-regularizations($language, $highlightQuery) else '' let $highlightQueryWords := if ($highlightQueryWordsTemp != '' and $highlightQueryRegularizations = '') then $highlightQueryWordsTemp else if ($highlightQueryWordsTemp = '' and $highlightQueryRegularizations != '') then $highlightQueryRegularizations else if ($highlightQueryWordsTemp != '' and $highlightQueryRegularizations != '') then concat($highlightQueryWordsTemp, '|', $highlightQueryRegularizations) else () let $currentTimeEnd := util:system-time() let $neededTime := mpdl-time:duration-as-ms($currentTimeEnd - $currentTimeBegin) let $xmlResult := if ($errorCode < 10) then <result> <document-description> <uri>{$mpdlDocUri}</uri> <collection-name>{$docbase}</collection-name> <document-name>{$documentName}</document-name> <language>{$language}</language> <authors>{$authors}</authors> <titles>{$titles}</titles> <places>{$places}</places> <date>{$date}</date> <identifier>{$identifier}</identifier> <rights>{$rights}</rights> <accessRights>{$accessRights}</accessRights> <licenses>{$licenses}</licenses> <file>{$file}</file> <translator>{$translator}</translator> <version>{$version}</version> <count-pages>{$countPages}</count-pages> <count-places>{$countGisPlaces}</count-places> <count-toc-entries>{$countTocEntries}</count-toc-entries> <count-figure-entries>{$countFigureEntries}</count-figure-entries> </document-description> <page> <mode>{$mode}</mode> <number>{$pn}</number> <header>{$pageHeader}</header> <number-orig>{$pageNumberOrig}</number-orig> <number-orig-norm>{$pageNumberOrigNorm}</number-orig-norm> <sentence-number>{$sn}</sentence-number> <digilib-available>{$digilibAvailable}</digilib-available> <image-available>{$imageIsAvailable}</image-available> <image-file-name>{$imageFileName}</image-file-name> {$imageEcho} {$imageScaler} <xml-url>?document={$documentName}&pn={$pn}&mode=xml</xml-url> <page-image-directory>{$imagesDocDirectory}/{$pageImageDirectory}</page-image-directory> <figures-image-directory>{$imagesDocDirectory}/{$figuresImageDirectory}</figures-image-directory> <firstFigurePosition>{$positionOfFirstFigureAfterPB1}</firstFigurePosition> <figures>{$pageFigures}</figures> <handwritten>{$pageHandwritten}</handwritten> <tables>{$pageTables}</tables> <notes>{$pageNotes}</notes> <highlights> <query>{$highlightQuery}</query> <words>{$highlightQueryWords}</words> </highlights> <content>{$returnPageFragment}</content> <character-normalization>{$charNorm}</character-normalization> <options>{$options}</options> </page> <performance>{$neededTime}</performance> </result> else if ($errorCode = 10) then <error><number>{$errorCode}</number><description>Can't find fulltext document: {$mpdlDocUri} </description></error> else if ($errorCode = 11) then <error><number>{$errorCode}</number><description>No result: Page {$pn} not found</description></error> else if ($errorCode = 12) then <error><number>{$errorCode}</number><description>View mode {$mode} not available</description></error> else <error><number>{$errorCode}</number><description>undefined error: {$errorCode}</description></error> let $declare := if ($errorCode > 9 or $mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml") then util:declare-option("exist:serialize", "method=xhtml media-type=text/html omit-xml-declaration=no indent=no encoding=utf-8") else if ($mode = "pureXml") then util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8") else util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8") let $xslFilePath := if($mode = "text" or $mode = "textPollux" or $mode = "gis" or $mode = "image" or $mode = "xml") then concat($presentationPath, "/pageFragmentHtml.xsl") else concat($presentationPath, "/pageXml.xsl") let $titleStr := concat(string-join($authors, ', '), ". ", string-join($titles, ', '), ". ", string-join($places, ', '), " ", $date, ".") let $tmpResult := if ($errorCode < 10 and $reqExport = "pdf") then mpdl-text:html2pdf($language, $xmlResult, $xslFilePath, $titleStr, $pn, $mode) else if ($errorCode < 10 and not($reqExport = "pdf")) then mpdl-text:transform($xmlResult, $xslFilePath) else <div>{$xmlResult}</div> (: error xml result :) let $result := if ($errorCode < 10 and $reqExport = "pdf") then response:stream-binary($tmpResult, "application/pdf", concat($documentName, "-page", $pn, ".pdf")) else $tmpResult let $setHeader := if ($mode = "pureXml") then response:set-header('Content-Disposition', concat('filename=', $documentName, '-page', $pn)) else () return $result