Mercurial > hg > mpdl-group
view software/eXist/webapp/mpdl/presentation/functions-text.xsl @ 7:5589d865af7a
Erstellung XQL/XSL Applikation
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 08 Feb 2011 15:16:46 +0100 |
parents | |
children | 7e883ce72fec |
line wrap: on
line source
<?xml version="1.0"?> <xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:saxon="http://saxon.sf.net/" xmlns:functx="http://www.functx.com" xmlns:text="http://www.mpiwg-berlin.mpg.de/ns/mpdl/text" xmlns:xs="http://www.w3.org/2001/XMLSchema"> <xsl:import href="/db/mpdl/presentation/functions-functx.xsl" /> <!-- delivers a concatenation of n chars of the given char --> <xsl:function name="text:nchars"> <xsl:param name="count" as="xs:integer"/> <xsl:param name="char" as="xs:string"/> <xsl:sequence select="if ($count > 1) then concat($char, text:nchars($count - 1, $char)) else $char"/> </xsl:function> <xsl:function name="text:sortWithComma"> <xsl:param name="inputString" as="xs:string"/> <xsl:variable name="inputStrings" select="tokenize($inputString, '\|')"/> <xsl:variable name="sortedInputStrings" select="functx:sort($inputStrings)"/> <xsl:variable name="result"> <xsl:for-each select="$sortedInputStrings"> <xsl:value-of select="concat(., ', ')"/> </xsl:for-each> </xsl:variable> <xsl:variable name="length" select="string-length($result)"/> <xsl:choose> <xsl:when test="$length > 0"> <xsl:value-of select="substring($result, 0, $length - 1)"/> </xsl:when> <xsl:otherwise> <xsl:value-of select="''"/> </xsl:otherwise> </xsl:choose> </xsl:function> <!-- Compares inputString1 and inputString2 and removes the duplicates in --> <!-- inputString1 and gives them back --> <!-- inputString1 and inputString2 contain a list of strings separated by "|" --> <xsl:function name="text:removeDuplicates"> <xsl:param name="inputString1" as="xs:string"/> <xsl:param name="inputString2" as="xs:string"/> <xsl:variable name="inputStrings1" select="tokenize($inputString1, '\|')"/> <xsl:variable name="inputStrings2" select="tokenize($inputString2, '\|')"/> <xsl:variable name="result"> <xsl:for-each select="$inputStrings1"> <xsl:variable name="str" select="."/> <xsl:if test="not(text:contained($str, $inputStrings2))"><xsl:value-of select="concat($str, '|')"/></xsl:if> </xsl:for-each> </xsl:variable> <xsl:variable name="length" select="string-length($result)"/> <xsl:choose> <xsl:when test="$length > 0"> <xsl:value-of select="substring($result, 0, $length)"/> </xsl:when> <xsl:otherwise> <xsl:value-of select="''"/> </xsl:otherwise> </xsl:choose> </xsl:function> <xsl:function name="text:contained" as="xs:boolean"> <xsl:param name="arg" as="xs:string?"/> <xsl:param name="searchStrings" as="xs:string*"/> <xsl:sequence select="some $searchString in $searchStrings satisfies compare($arg, $searchString) = 0"/> </xsl:function> <xsl:function name="text:cutStringBefore"> <xsl:param name="inputString" as="xs:string"/> <xsl:param name="cutLength" as="xs:integer"/> <xsl:variable name="length" select="string-length($inputString)"/> <xsl:variable name="cutString" select="substring($inputString, $length - $cutLength)"/> <xsl:choose> <xsl:when test="$length > $cutLength"> (...) <xsl:value-of select="text:cutFirstWord($cutString)"/> </xsl:when> <xsl:otherwise><xsl:value-of select="$cutString"/></xsl:otherwise> </xsl:choose> </xsl:function> <xsl:function name="text:cutStringAfter"> <xsl:param name="inputString" as="xs:string"/> <xsl:param name="cutLength" as="xs:integer"/> <xsl:variable name="length" select="string-length($inputString)"/> <xsl:variable name="cutString" select="substring($inputString, 0, $cutLength)"/> <xsl:choose> <xsl:when test="$length > $cutLength"> <xsl:value-of select="text:cutLastWord($cutString)"/> (...) </xsl:when> <xsl:otherwise><xsl:value-of select="$cutString"/></xsl:otherwise> </xsl:choose> </xsl:function> <xsl:function name="text:cutFirstWord"> <xsl:param name="inputString" as="xs:string"/> <xsl:value-of select="replace($inputString, '^.*?[\s:.,;!_]', ' ', 'im')"/> </xsl:function> <xsl:function name="text:cutLastWord"> <xsl:param name="inputString" as="xs:string"/> <xsl:value-of select="replace($inputString, '(.*)[\s:.,;!_].*$', '$1 ', 'im')"/> </xsl:function> <xsl:function name="text:trim"> <xsl:param name="inputString" as="xs:string"/> <xsl:variable name="trimBefore" select="replace($inputString, '^\s+(.*?)', '$1')"/> <xsl:value-of select="replace($trimBefore, '(.*?)\s+$', '$1')"/> </xsl:function> <!-- Word delimiter: not tested yet --> <!-- TODO: bol, eol, ", &, <, > --> <!-- <xsl:variable name="wordDelimRegExpr" select="'[\s\(\)\[\]\.\\\{\}\$\^\+\?\!\* ยง%:,;=/]+'"/> --> <!-- Tokenization of a Lucene query with phrases and words to a list of query terms separated with | --> <xsl:function name="text:translateLuceneToTerms"> <xsl:param name="inputLuceneQuery" as="xs:string" /> <!-- Delete all parantheses outside quotes: (a AND b) OR c -> a AND b OR c --> <xsl:variable name="luceneQueryWithoutParantheses1" select="replace($inputLuceneQuery, '([^\\])[()]', '$1')" /> <xsl:variable name="luceneQueryWithoutParantheses2" select="replace($luceneQueryWithoutParantheses1, '^[()]', '')" /> <!-- Unescape special chars in Lucene query --> <xsl:variable name="luceneQueryUnescaped" select="replace($luceneQueryWithoutParantheses2, '\\([-\+&|!(){}\[\]\^"~*?:\\])', '$1')" /> <!-- Escape special chars which have a meta meaning in regular expressions --> <xsl:variable name="luceneQueryRegExprEscaped1" select="replace($luceneQueryUnescaped, '[*]', '###Star###')" /> <xsl:variable name="luceneQueryRegExprEscaped2" select="replace($luceneQueryRegExprEscaped1, '[\+]', '###Plus###')" /> <xsl:variable name="luceneQueryRegExprEscaped3" select="replace($luceneQueryRegExprEscaped2, '[?]', '###QM###')" /> <xsl:variable name="luceneQueryRegExprEscaped4" select="replace($luceneQueryRegExprEscaped3, '[\.]', '###Dot###')" /> <xsl:variable name="luceneQueryRegExprEscaped5" select="replace($luceneQueryRegExprEscaped4, '[\^]', '###BeginLine###')" /> <xsl:variable name="luceneQueryRegExprEscaped6" select="replace($luceneQueryRegExprEscaped5, '[$]', '###EndLine###')" /> <xsl:variable name="luceneQueryRegExprEscaped7" select="replace($luceneQueryRegExprEscaped6, '[|]', '###Or###')" /> <xsl:variable name="luceneQueryRegExprEscaped8" select="replace($luceneQueryRegExprEscaped7, '[(]', '###Paranthes1Open###')" /> <xsl:variable name="luceneQueryRegExprEscaped9" select="replace($luceneQueryRegExprEscaped8, '[)]', '###Paranthes1Close###')" /> <xsl:variable name="luceneQueryRegExprEscaped10" select="replace($luceneQueryRegExprEscaped9, '[{]', '###Paranthes2Open###')" /> <xsl:variable name="luceneQueryRegExprEscaped11" select="replace($luceneQueryRegExprEscaped10, '[}]', '###Paranthes2Close###')" /> <xsl:variable name="luceneQueryRegExprEscaped12" select="replace($luceneQueryRegExprEscaped11, '[\[]', '###Paranthes3Open###')" /> <xsl:variable name="luceneQueryRegExprEscaped13" select="replace($luceneQueryRegExprEscaped12, '[\]]', '###Paranthes3Close###')" /> <xsl:variable name="luceneQueryRegExprEscaped14" select="replace($luceneQueryRegExprEscaped13, '~[0123456789,]*', '###Tilde###')" /> <xsl:value-of select="text:translateLuceneQueryToTermsMain($luceneQueryRegExprEscaped14, 'DUMMYDUMMYSTART')" /> </xsl:function> <!-- Tokenization of a Lucene query with phrases and words to a list of query terms separated with | --> <xsl:function name="text:translateLuceneQueryToTermsMain"> <xsl:param name="inputString" as="xs:string" /> <!-- last term in previous step: used for Lucene operator NOT) --> <xsl:param name="lastFetchedTerm" as="xs:string" /> <xsl:choose> <!-- single phrase: phrase is appended --> <xsl:when test="matches($inputString, '^"[^"]*?"$')"> <xsl:variable name="withoutSurroundingQuotes" select="substring-before(substring-after($inputString, '"'), '"')"/> <xsl:variable name="termEscaped" select="text:escapeLucenePhraseForRegExpr($withoutSurroundingQuotes)" /> <xsl:variable name="term" select="text:translateLuceneQueryTermToTerm($termEscaped, $lastFetchedTerm)" /> <xsl:value-of select="$term" /> </xsl:when> <!-- "+" followd by a single phrase: phrase is appended --> <xsl:when test="matches($inputString, '^###Plus###"[^"]*?"$')"> <xsl:variable name="length" select="string-length($inputString)"/> <xsl:variable name="withoutSurroundingQuotes" select="substring($inputString, 12, $length - 12)"/> <xsl:variable name="termEscaped" select="text:escapeLucenePhraseForRegExpr($withoutSurroundingQuotes)" /> <xsl:variable name="term" select="text:translateLuceneQueryTermToTerm($termEscaped, $lastFetchedTerm)" /> <xsl:value-of select="$term" /> </xsl:when> <!-- "-" followd by a single phrase: phrase is not appended --> <xsl:when test="matches($inputString, '^-"[^"]*?"$')"> <xsl:value-of select="'DUMMYDUMMYMINUS'" /> </xsl:when> <!-- TODO: single phrase followed by near operator (~[0123456789]+): determine distance between --> <xsl:when test="matches($inputString, '^"[^"]*?"~[0123456789]+$')"> <xsl:value-of select="$inputString" /> </xsl:when> <!-- phrase followed by something: phrase is appended --> <xsl:when test="matches($inputString, '^".*?"\s')"> <xsl:variable name="afterFirstTerm" select="replace($inputString, '^".*?"\s', '')"/> <xsl:variable name="length" select="string-length($inputString)"/> <xsl:variable name="afterFirstTermLength" select="string-length($afterFirstTerm)"/> <xsl:variable name="firstTerm" select="substring($inputString, 1, $length - $afterFirstTermLength)"/> <xsl:variable name="afterFirstTermTrimmed" select="text:trim($afterFirstTerm)"/> <xsl:variable name="firstTermWithoutSurroundingQuotes" select="substring-before(substring-after($firstTerm, '"'), '"')"/> <xsl:variable name="firstTermEscaped" select="text:escapeLucenePhraseForRegExpr($firstTermWithoutSurroundingQuotes)" /> <xsl:variable name="term" select="text:translateLuceneQueryTermToTerm($firstTermEscaped, $lastFetchedTerm)" /> <xsl:value-of select="concat($term, '|')" /> <!-- Recursive call of this function with the substring after the first term --> <xsl:value-of select="text:translateLuceneQueryToTermsMain($afterFirstTermTrimmed, $term)"/> </xsl:when> <!-- + sign followd by phrase followed by something: phrase is appended --> <xsl:when test="matches($inputString, '^###Plus###".*?"\s')"> <xsl:variable name="afterFirstTerm" select="replace($inputString, '^###Plus###".*?"\s', '')"/> <xsl:variable name="length" select="string-length($inputString)"/> <xsl:variable name="afterFirstTermLength" select="string-length($afterFirstTerm)"/> <xsl:variable name="firstTerm" select="substring($inputString, 1, $length - $afterFirstTermLength)"/> <xsl:variable name="afterFirstTermTrimmed" select="text:trim($afterFirstTerm)"/> <xsl:variable name="firstTermWithoutSurroundingQuotes" select="substring-before(substring-after($firstTerm, '"'), '"')"/> <xsl:variable name="firstTermEscaped" select="text:escapeLucenePhraseForRegExpr($firstTermWithoutSurroundingQuotes)" /> <xsl:variable name="term" select="text:translateLuceneQueryTermToTerm($firstTermEscaped, $lastFetchedTerm)" /> <xsl:value-of select="concat($term, '|')" /> <!-- Recursive call of this function with the substring after the first term --> <xsl:value-of select="text:translateLuceneQueryToTermsMain($afterFirstTermTrimmed, $term)"/> </xsl:when> <!-- "-" followd by phrase followed by something: phrase is not appended --> <xsl:when test="matches($inputString, '^-".*?"\s')"> <xsl:variable name="afterFirstTerm" select="replace($inputString, '^-".*?"\s', '')"/> <xsl:variable name="afterFirstTermTrimmed" select="text:trim($afterFirstTerm)"/> <!-- Recursive call of this function with the substring after the first term --> <xsl:value-of select="text:translateLuceneQueryToTermsMain($afterFirstTermTrimmed, 'DUMMYDUMMYMINUS')"/> </xsl:when> <!-- single word: without quotes and spaces: word is appended --> <xsl:when test="matches($inputString, '^[^"\s]*$')"> <xsl:variable name="termEscaped" select="text:escapeLuceneTermForRegExpr($inputString)" /> <xsl:variable name="term" select="text:translateLuceneQueryTermToTerm($termEscaped, $lastFetchedTerm)"/> <xsl:value-of select="$term" /> </xsl:when> <!-- word followed by something: word is appended --> <xsl:when test="matches($inputString, '^[^"\s]*?\s')"> <xsl:variable name="afterFirstTerm" select="replace($inputString, '^[^"\s]*?\s', '')"/> <xsl:variable name="length" select="string-length($inputString)"/> <xsl:variable name="afterFirstTermLength" select="string-length($afterFirstTerm)"/> <xsl:variable name="firstTerm" select="substring($inputString, 1, $length - $afterFirstTermLength)"/> <xsl:variable name="firstTermTrimmed" select="text:trim($firstTerm)"/> <xsl:variable name="afterFirstTermTrimmed" select="text:trim($afterFirstTerm)"/> <!-- treat single Lucene term: special Lucene characters in query term ("*", "+", ".", "-") --> <xsl:variable name="termEscaped" select="text:escapeLuceneTermForRegExpr($firstTermTrimmed)" /> <xsl:variable name="term" select="text:translateLuceneQueryTermToTerm($termEscaped, $lastFetchedTerm)" /> <xsl:value-of select="concat($term, '|')" /> <!-- Recursive call of this function with the substring after the first term --> <xsl:value-of select="text:translateLuceneQueryToTermsMain($afterFirstTermTrimmed, $term)"/> </xsl:when> <xsl:otherwise> <xsl:value-of select="''"/> </xsl:otherwise> </xsl:choose> </xsl:function> <xsl:function name="text:escapeLuceneTermForRegExpr"> <xsl:param name="inputString" as="xs:string" /> <!-- replace special Lucene characters: "*", "+", "~" were escaped for regular expression --> <xsl:variable name="termWithoutLuceneSymbols1" select="replace($inputString, '###Star###', '')" /> <xsl:variable name="termWithoutLuceneSymbols2" select="replace($termWithoutLuceneSymbols1, '###Plus###', '')" /> <xsl:variable name="termWithoutLuceneSymbols3" select="replace($termWithoutLuceneSymbols2, '###Tilde###', '')" /> <!-- Lucene mask symbol "?" is replaced with regular expression symbol "." --> <xsl:variable name="term" select="replace($termWithoutLuceneSymbols3, '###QM###', '.')" /> <xsl:value-of select="$term" /> </xsl:function> <xsl:function name="text:escapeLucenePhraseForRegExpr"> <xsl:param name="inputString" as="xs:string" /> <!-- replace special Lucene characters: "*" and "+" were escaped for regular expression --> <xsl:variable name="termWithoutLuceneSymbols1" select="replace($inputString, '###Star###', '\\*')" /> <xsl:variable name="termWithoutLuceneSymbols2" select="replace($termWithoutLuceneSymbols1, '###Plus###', '')" /> <!-- Lucene mask symbol "?" is replaced with regular expression symbol "." --> <xsl:variable name="term" select="replace($termWithoutLuceneSymbols2, '###QM###', '\\?')" /> <xsl:value-of select="$term" /> </xsl:function> <!-- last special char replacements and logical operator handling --> <xsl:function name="text:translateLuceneQueryTermToTerm"> <xsl:param name="inputString" as="xs:string" /> <!-- last term in previous step: used for Lucene operator NOT) --> <xsl:param name="lastFetchedTerm" as="xs:string" /> <xsl:variable name="termEscaped1" select="replace($inputString, '###Dot###', '\\.')" /> <xsl:variable name="termEscaped2" select="replace($termEscaped1, '###BeginLine###', '\\^')" /> <xsl:variable name="termEscaped3" select="replace($termEscaped2, '###EndLine###', '\\DollarSign')" /> <!-- TODO --> <xsl:variable name="termEscaped4" select="replace($termEscaped3, '###Or###', '\\|')" /> <xsl:variable name="termEscaped5" select="replace($termEscaped4, '###Paranthes1Open###', '\\(')" /> <xsl:variable name="termEscaped6" select="replace($termEscaped5, '###Paranthes1Close###', '\\)')" /> <xsl:variable name="termEscaped7" select="replace($termEscaped6, '###Paranthes2Open###', '\\{')" /> <xsl:variable name="termEscaped8" select="replace($termEscaped7, '###Paranthes1Close###', '\\}')" /> <xsl:variable name="termEscaped9" select="replace($termEscaped8, '###Paranthes3Open###', '\\[')" /> <xsl:variable name="term" select="replace($termEscaped9, '###Paranthes3Close###', '\\]')" /> <xsl:choose> <xsl:when test="($term != 'AND') and ($term != 'OR') and ($term != 'NOT') and (substring($term, 1, 1) != '-') and (($lastFetchedTerm = 'DUMMYDUMMYSTART') or ($lastFetchedTerm != 'DUMMYDUMMYNOT'))"> <xsl:value-of select="$term" /> </xsl:when> <xsl:otherwise> <xsl:value-of select="concat('DUMMYDUMMY', $term)"/> </xsl:otherwise> </xsl:choose> </xsl:function> <!-- Highlight all term occurrences. Result is a sequence of text and highlighted nodes (with span). Example: LE<span ...>MECHANICHE</span>...<span ...>Mechaniche</span>... --> <xsl:function name="text:highlight"> <xsl:param name="inputStr" as="xs:string" /> <xsl:param name="terms" as="xs:string" /> <xsl:param name="words" as="xs:string" /> <xsl:param name="clipped" as="xs:string" /> <xsl:variable name="hitBeginStr" select="'XXhitStartXX'"/> <xsl:variable name="hitEndStr" select="'XXhitEndXX'"/> <xsl:variable name="inputStringTemp" select="replace($inputStr, '\n', '')"/> <!-- replace all term or word occurences with surrounding begin and end marks (string operation) --> <xsl:choose> <xsl:when test="$terms != '' and $words != ''"> <xsl:variable name="inputStringWithMarksForTerms" select="replace($inputStringTemp, concat('(', $terms, ')'), concat($hitBeginStr, '$1', $hitEndStr), 'im')"/> <xsl:variable name="inputStringWithMarksForWords" select="replace($inputStringWithMarksForTerms, concat('([\s:.,;!_]+|^)', '(', $words, ')', '([\s:.,;!_]+|$)'), concat('$1', $hitBeginStr, '$2', $hitEndStr, '$3'), 'im')"/> <xsl:sequence select="text:highlightTerms($inputStringWithMarksForWords, $clipped, $hitBeginStr, $hitEndStr)" /> </xsl:when> <xsl:when test="$terms != '' and $words = ''"> <xsl:variable name="inputStringWithMarksForTerms" select="replace($inputStringTemp, concat('(', $terms, ')'), concat($hitBeginStr, '$1', $hitEndStr), 'im')"/> <xsl:sequence select="text:highlightTerms($inputStringWithMarksForTerms, $clipped, $hitBeginStr, $hitEndStr)" /> </xsl:when> <xsl:when test="$terms = '' and $words != ''"> <xsl:variable name="inputStringWithMarksForWords" select="replace($inputStringTemp, concat('([\s:.,;!_]+|^)', '(', $words, ')', '([\s:.,;!_]+)'), concat('$1', $hitBeginStr, '$2', $hitEndStr, '$3'), 'im')"/> <xsl:sequence select="text:highlightTerms($inputStringWithMarksForWords, $clipped, $hitBeginStr, $hitEndStr)" /> </xsl:when> <xsl:otherwise> <xsl:sequence select="$inputStr"/> </xsl:otherwise> </xsl:choose> </xsl:function> <!-- Convert an input string with hits (marked with begin and end marks) to a sequence of text nodes with highlight span nodes --> <xsl:function name="text:highlightTerms"> <xsl:param name="inputString" as="xs:string" /> <xsl:param name="clipped" as="xs:string" /> <xsl:param name="hitBeginStr" /> <xsl:param name="hitEndStr" /> <xsl:variable name="substringBefore" select="substring-before($inputString, $hitEndStr)"/> <xsl:variable name="substringAfter" select="substring-after($inputString, $hitEndStr)"/> <xsl:variable name="beforeHitBeginString" select="substring-before($substringBefore, $hitBeginStr)"/> <xsl:variable name="hitTerm" select="substring-after($substringBefore, $hitBeginStr)"/> <xsl:choose> <xsl:when test="contains($inputString, $hitEndStr)"> <!-- Prints the original part of the substring up to the first occurrence of a hit --> <xsl:choose> <xsl:when test="$clipped='true'"> <xsl:value-of select="text:cutStringBefore($beforeHitBeginString, 70)"/> </xsl:when> <xsl:otherwise> <xsl:value-of select="$beforeHitBeginString"/> </xsl:otherwise> </xsl:choose> <!-- Highlight the hit --> <span class="hit highlight"> <xsl:value-of select="$hitTerm"/> </span> <!-- Recursive call of this function with the substring after the first occurrence of the hit: further occurrences of hits --> <xsl:sequence select="text:highlightTerms($substringAfter, $clipped, $hitBeginStr, $hitEndStr)"/> </xsl:when> <!-- if no occurrence of a hit could be found the whole string is printed --> <xsl:otherwise> <xsl:choose> <xsl:when test="$clipped='true'"> <xsl:value-of select="text:cutStringAfter($inputString, 70)"/> </xsl:when> <xsl:otherwise> <xsl:value-of select="$inputString"/> </xsl:otherwise> </xsl:choose> </xsl:otherwise> </xsl:choose> </xsl:function> </xsl:stylesheet>