view software/eXist/webapp/mpdl/interface/lt/wordInfo.xql @ 11:d6f528ad5d96

TEI Unterst?tzung, Fehlerbehebungen, externe Objekte
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Fri, 11 Mar 2011 13:34:02 +0100
parents 5589d865af7a
children 469d927b9ca7
line wrap: on
line source

xquery version "1.0";

import module namespace mpdl-time = "http://www.mpiwg-berlin.mpg.de/ns/mpdl/util/time" at "../../util/time.xql";

declare namespace request="http://exist-db.org/xquery/request";
declare namespace xlink="http://www.w3.org/1999/xlink";

(: TODO: Verlinkung von lemma, form, weiteren Einheiten in den Infos auf mpdl-proto (HTML und XML) :)

let $type :=  request:get-parameter("type", "compact")
let $language := request:get-parameter("language", "")
let $word := request:get-parameter("word", "")
let $placeHref := request:get-parameter("placeHref", "")
let $output := request:get-parameter("output", "xml")

let $currentTimeBegin := util:system-time()

let $lemmas := mpdltext:get-lemmas-by-form-name($language, $word)
let $htmlOrderedLemmas := 
  if (empty($lemmas))
  then ("no lemmas found for your query")
  else 
    for $lemma in $lemmas/lemmas/lemma
      let $lemmaText := 
        if ($type = "compact")
        then $lemma/lemma-name
        else concat($lemma/lemma-name, " (", $lemma/provider, ")")
      let $orderedFormsStrTmp :=
        for $form in $lemma/forms/form
          let $formStr := concat($form/form-name, " (", $form/provider, ")")
        order by $form/form-name
        return $formStr
      let $orderedFormsStr := string-join($orderedFormsStrTmp, ", ")
      let $remotePerseusLink :=
        if ($language = "ar" or $language = "la")
        then concat("http://www.perseus.tufts.edu/hopper/morph?l=", $lemma/lemma-name, "&amp;la=", $language)
        else if ($language = "el")
        then concat("http://www.perseus.tufts.edu/hopper/morph?l=", $lemma/lemma-name, "&amp;la=greek")
        else ""
      let $lemmaLi := 
        if (($type = "all" or $type = "morph") and ($language = "ar" or $language = "el" or $language = "la"))
        then 
          <li><b>Lemma: </b>{$lemmaText} (external link to <a href="{$remotePerseusLink}">www.perseus.tufts.edu</a>)
            <ul>{$orderedFormsStr}</ul>
          </li>
        else if ($type ="compact" and ($language = "ar" or $language = "el" or $language = "la"))
        then
          <li><b>Lemma: </b>{$lemmaText} (external link to <a href="{$remotePerseusLink}">www.perseus.tufts.edu</a>)</li>
        else if ($type ="compact")
        then
          <li><b>Lemma: </b>{$lemmaText}</li>
        else
          <li><b>Lemma: </b>{$lemmaText}
            <ul>{$orderedFormsStr}</ul>
          </li>
    order by $lemma/lemma-name
    return $lemmaLi
let $xmlOrderedLemmas := 
  for $lemma in $lemmas/lemmas/lemma
    let $orderedForms :=
      for $form in $lemma/forms/form
      order by $form/form-name
      return $form
    let $retLemma := 
      <lemma>
        {$lemma/provider}
        {$lemma/language}
        {$lemma/lemma-name}
        <forms size="{$lemma/forms-size}">{$orderedForms}</forms>
      </lemma>
  order by $lemma/lemma-name
  return $retLemma
let $lemmasStrTmpTmp := 
  if (empty($lemmas))
  then ""
  else 
    for $lemma in $lemmas/lemmas/lemma
      let $lemmaStr := $lemma/lemma-name
    order by $lemma/lemma-name
    return $lemmaStr
let $lemmasStrWithoutWord := string-join($lemmasStrTmpTmp, " ")
let $lemmasStrTmp := 
  if (not(contains($lemmasStrWithoutWord, $word)))
  then concat($word, " ", $lemmasStrWithoutWord)  (: also the form itself is added   :)
  else $lemmasStrWithoutWord
let $lemmasStr := 
  if ($language = "de" and (contains($lemmasStrTmp, "ae") or contains($lemmasStrTmp, "oe") or contains($lemmasStrTmp, "ue") or contains($lemmasStrTmp, "ss")))
  then replace(replace(replace(replace($lemmasStrTmp, "ae", "ä"), "oe", "ö"), "ue", "ü"), "ss", "ß")
  else $lemmasStrTmp
let $lemmasStrTokenized := tokenize($lemmasStr, " ")

let $dictionariesLocal := 
  if ($type = "all" or $type = "compact" or $type = "dict")
  then mpdltext:get-lex-entries-by-lucene-query($language, $lemmasStr)
  else ()
let $dictionariesRemoteTmp := 
  if ($language = "de")
  then
    <lexica>
      <lexicon>
        <name>dwds</name>
        <description>Deutsches Wörterbuch der deutschen Sprache</description>
      </lexicon>
    </lexica>
  else if ($language = "el")
  then
    <lexica>
      <lexicon>
        <name>slater</name>
        <description>William J. Slater, Lexicon to Pindar</description>
      </lexicon>
    </lexica>
  else if ($language = "fr")
  then
    <lexica>
      <lexicon>
        <name>artfl-fr</name>
        <description>The ARTFL project: Dictionnaires d'autrefois: French dictionaries of the 17th, 18th, 19th and 20th centuries</description>
      </lexicon>
      <lexicon>
        <name>artfl-fr-en</name>
        <description>The ARTFL project: French - English dictionary</description>
      </lexicon>
    </lexica>
  else if ($language = "la")
  then
    <lexica>
      <lexicon>
        <name>lewis</name>
        <description>Charlton T. Lewis, an Elementary Latin Dictionary</description>
      </lexicon>
    </lexica>
  else if ($language = "nl")
  then
    <lexica>
      <lexicon>
        <name>wikiwoordenboek</name>
        <description>Wiktionary: WikiWoordenboek</description>
      </lexicon>
    </lexica>
  else if ($language = "zh")
  then
    <lexica>
      <lexicon>
        <name>ctp</name>
        <description>Chinese Text Project</description>
      </lexicon>
      <lexicon>
        <name>linyutan</name>
        <description>Lin Yutang</description>
      </lexicon>
      <lexicon>
        <name>chinese-unicode</name>
        <description>Unicode</description>
      </lexicon>
      <lexicon>
        <name>chinese-wiktionary</name>
        <description>Wiktionary</description>
      </lexicon>
    </lexica>
  else ()

let $dictionariesRemote := 
  for $lex in $dictionariesRemoteTmp//lexicon
    let $lexName := $lex/name
    let $lexEntries :=  
      for $l in $lemmasStrTokenized
        let $lLink := 
          if ($language = "el")
          then mpdltext:transcode("unicode", "betacode", $l)
          else if ($language = "zh")
          then mpdltext:encode-big5($l)
          else $l
        let $repairedEntryContentLink :=
          if ($lexName = "dwds")
          then concat("http://beta.dwds.de/?qu=", $l)
          else if ($lexName = "slater")
          then concat("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0072:entry=", $lLink)
          else if ($lexName = "artfl-fr-en")
          then concat("http://machaut.uchicago.edu/?resource=frengdict&amp;action=search&amp;french=", $l)
          else if ($lexName = "artfl-fr")
          then concat("http://artflx.uchicago.edu/cgi-bin/dicos/pubdico1look.pl?strippedhw=", $l)
          else if ($lexName = "lewis")
          then concat("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0060:entry=", $l)
          else if ($lexName = "wikiwoordenboek")
          then concat("http://nl.wiktionary.org/wiki/", $l)
          else if ($lexName = "linyutan")
          then concat("http://humanum.arts.cuhk.edu.hk/cgi-bin/agrep-lindict?query=", $lLink, "&amp;category=wholerecord")
          else if ($lexName = "ctp")
          then concat("http://ctext.org/dictionary.pl?if=en&amp;char=", $l)
          else if ($lexName = "chinese-wiktionary")
          then concat("http://en.wiktionary.org/wiki/", $l)
          else if ($lexName = "chinese-unicode")
          then concat("http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=", $l)
          else ""
        let $lexiconEntry :=
          <entry>
            <form>{$l}</form>
            <content>
              <xml-valid>true</xml-valid>
              <original-entry>&lt;original-entry&gt;&lt;/original-entry&gt;</original-entry>
              <repaired-entry><directLink xlink:type="simple" xlink:href="{$repairedEntryContentLink}">{$l}</directLink></repaired-entry>
            </content>
          </entry>
      return $lexiconEntry
  return 
    <lexica>
      <lexicon>
        <name>{$lex/name}</name>
        <description>{$lex/description}</description>
        <entries>{$lexEntries}</entries>
      </lexicon>
    </lexica>

let $dictionaries := 
    <result>
    <lexica>
      {$dictionariesLocal//lexicon}
      {$dictionariesRemote//lexicon}
    </lexica>
    </result>
let $retDictionaries := 
  if (empty($dictionaries))
  then ()
  else
    for $dictionary in $dictionaries/lexica/lexicon
      let $dictName := $dictionary/name
      let $dictDescription := $dictionary/description
      let $dictEntries := $dictionary/entries
      let $entryDictEntryContent :=
        for $dictEntry in $dictionary/entries/entry
          let $dictEntryContent := $dictEntry/content
          let $dictEntryXmlValid := $dictEntryContent/xml-valid
          let $dictEntryOriginalContent := $dictEntryContent/original-entry
          let $dictEntryRepairedContent := $dictEntryContent/repaired-entry
          let $dictEntryRepairedContentLink := $dictEntryRepairedContent/directLink
          let $dictEntryForm := 
            if ($language = "el")
            then mpdltext:transcode("unicode", "betacode", string($dictEntry/form))
            else if ($language = "ar")
            then mpdltext:transcode("unicode", "buckwalter", string($dictEntry/form))
            else $dictEntry/form
          let $dictEntryContentParsedTmp := 
            if ($dictEntryXmlValid = "true" and empty($dictEntryRepairedContentLink))
            then util:parse($dictEntryRepairedContent)
            else if ($dictEntryXmlValid = "true" and not(empty($dictEntryRepairedContentLink)) and $output = "html")
            then <div>External link: <a href="{$dictEntryRepairedContentLink/@xlink:href}">{$dictEntryRepairedContentLink}</a></div>
            else if ($dictEntryXmlValid = "true" and not(empty($dictEntryRepairedContentLink)) and $output = "xml")
            then <div>{$dictEntryRepairedContentLink}</div>
            else <bla><text>[<i>Remark: entry in dictionary has no valid XML, so only the Betacode text version of the entry could be displayed</i>]</text><p></p>{$dictEntryOriginalContent}</bla>
          let $dictEntryContentParsed := 
            if ($dictionary/name = "ls" and $output = "html")
            then <div>{$dictEntryContentParsedTmp}<br/>(external link to <a href="http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0059:entry={$dictEntry/form}">www.perseus.tufts.edu</a>)</div>
            else if ($dictionary/name = "ls" and $output = "xml")
            then <div>{$dictEntryContentParsedTmp}<directLink type="simple" xlink:href="http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0059:entry={$dictEntry/form}">www.perseus.tufts.edu</directLink></div>
            else if ($dictionary/name = "lsj" and $output = "html")
            then <div>{$dictEntryContentParsedTmp}<br/>(external link to <a href="http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0057:entry={$dictEntryForm}">www.perseus.tufts.edu</a>)</div>
            else if ($dictionary/name = "lsj" and $output = "xml")
            then <div>{$dictEntryContentParsedTmp}<directLink type="simple" xlink:href="http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0057:entry={$dictEntryForm}">www.perseus.tufts.edu</directLink></div>
            else if ($dictionary/name = "autenrieth" and $output = "html")
            then <div>{$dictEntryContentParsedTmp}<br/>(external link to <a href="http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0073:entry={$dictEntryForm}">www.perseus.tufts.edu</a>)</div>
            else if ($dictionary/name = "autenrieth" and $output = "xml")
            then <div>{$dictEntryContentParsedTmp}<directLink type="simple" xlink:href="http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0073:entry={$dictEntryForm}">www.perseus.tufts.edu</directLink></div>
            else if ($dictionary/name = "buckwalter" and $output = "html")
            then <div>{$dictEntryContentParsedTmp}<br/>(external link to <a href="http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:2002.02.0014:entry={$dictEntryForm}">www.perseus.tufts.edu</a>)</div>
            else if ($dictionary/name = "buckwalter" and $output = "xml")
            then <div>{$dictEntryContentParsedTmp}<directLink type="simple" xlink:href="http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:2002.02.0014:entry={$dictEntryForm}">www.perseus.tufts.edu</directLink></div>
            else if ($dictionary/name = "salmone" and $output = "html")
            then <div>{$dictEntryContentParsedTmp}<br/>(external link to <a href="http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:2002.02.0005:entry={$dictEntryForm}">www.perseus.tufts.edu</a>)</div>   
            else if ($dictionary/name = "salmone" and $output = "xml")
            then <div>{$dictEntryContentParsedTmp}<directLink type="simple" xlink:href="http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:2002.02.0005:entry={$dictEntryForm}">www.perseus.tufts.edu</directLink></div>
            else $dictEntryContentParsedTmp
          let $liDictEntryContentParsed := 
            if ($output = "html")
            then <li>{($dictEntryContentParsed)}</li>
            else if ($output = "xml")
            then <entry>{($dictEntryContentParsed)}</entry>
            else ()
        return $liDictEntryContentParsed
      let $dictLi := 
        if ($output = "html")
        then 
          <li><b>{$dictDescription}</b>
            <ul>{$entryDictEntryContent}</ul>
          </li>
        else if ($output = "xml")
        then 
          <dictionaryEntries>
          <dictionary>
            <name>{$dictName}</name>
            <description>{$dictDescription}</description>
            <entries>{$entryDictEntryContent}</entries>
          </dictionary>
          </dictionaryEntries>
        else ()
    return $dictLi

let $wikiArticles := 
  if (empty($lemmas))
  then ()
  else 
    for $l in $lemmasStrTokenized
      let $wikiHref1 := concat("http://", $language, ".wikipedia.org/wiki/", $l)
      let $wikiHref2 := concat("http://", $language, ".wikipedia.org/wiki/index.php?search=", $l)
      let $wikiArticle := 
        if ($output = "html")
        then <li><b>Article: </b>External link: <a href="{$wikiHref1}">{$l}</a> (or search for <a href="{$wikiHref2}">{$l})</a></li>
        else if ($output = "xml")
        then
          <article>
            <name>{$l}</name>
            <directLink xlink:type="simple" xlink:href="{$wikiHref1}"/>
            <searchLink xlink:type="simple" xlink:href="{$wikiHref2}"/>
          </article>
        else ()
    return $wikiArticle
let $places := 
  if ($placeHref != "" and $output = "html")
  then <li><b>Place: </b>External link: <a href="{$placeHref}">{$word}</a></li>
  else if ($placeHref != "" and $output = "html")
  then
      <place>
        <name>{$word}</name>
        <directLink xlink:type="simple" xlink:href="{$placeHref}"/>
      </place>
  else ()

let $dict := 
  if (not(empty($retDictionaries)) and ($type = "all" or $type = "compact" or $type = "dict") and $output = "html")
  then 
    <p>
      <h3>Dictionary</h3>
      <ul>{$retDictionaries}</ul>
    </p>
  else if (empty($retDictionaries) and ($type = "all" or $type = "compact" or $type = "dict") and $output = "html")
  then
    <p>
      <h3>Dictionary</h3>
      <ul>No information available</ul>
    </p>
  else if (not(empty($retDictionaries)) and ($type = "all" or $type = "compact" or $type = "dict") and $output = "xml")
  then $retDictionaries
  else ()
let $morph := 
  if ($language != "zh" and not(empty($htmlOrderedLemmas)) and ($type = "all" or $type = "compact" or $type = "morph") and $output = "html")
  then 
    <p>
      <h3>Morphology</h3>
      <ul>{$htmlOrderedLemmas}</ul>
    </p>
  else if ($language != "zh" and empty($htmlOrderedLemmas) and ($type = "all" or $type = "compact" or $type = "morph") and $output = "html")
  then
    <p>
      <h3>Morphology</h3>
      <ul>No information available</ul>
    </p>
  else if ($language != "zh" and not(empty($htmlOrderedLemmas)) and ($type = "all" or $type = "compact" or $type = "morph") and $output = "xml")
  then <morphologyEntries>{$xmlOrderedLemmas}</morphologyEntries>
  else if ($type = "morph" and $output = "string")
  then $lemmasStrWithoutWord
  else ()
let $wiki := 
  if (not(empty($wikiArticles)) and ($type = "all" or $type = "wiki") and $output = "html")
  then 
    <p>
      <h3>Wikipedia</h3>
      <ul>{$wikiArticles}</ul>
    </p>
  else if (not(empty($wikiArticles)) and ($type = "all" or $type = "wiki") and $output = "xml")
  then <wikiEntries>{$wikiArticles}</wikiEntries>
  else ()
let $place := 
  if (not(empty($places)) and ($type = "all" or $type = "compact" or $type = "place") and $output = "html")
  then 
    <p>
      <h3>Place</h3>
      <ul>{$places}</ul>
    </p>
  else if (not(empty($places)) and ($type = "all" or $type = "compact" or $type = "place") and $output = "xml")
  then <placeEntries>{$places}</placeEntries>
  else ()

let $currentTimeEnd := util:system-time()
let $neededTime := mpdl-time:duration-as-ms($currentTimeEnd - $currentTimeBegin)

let $declare := 
  if ($output = "html" or $output = "string")
  then util:declare-option("exist:serialize", "method=html media-type=text/html omit-xml-declaration=no indent=yes encoding=utf-8")
  else util:declare-option("exist:serialize", "method=xml media-type=text/xml omit-xml-declaration=no indent=yes encoding=utf-8")

let $queryResultHeaderStr := <h2>Word information for: {$word}</h2>
let $commentExternalLinks := 
   "[* external links may not function]"

let $retXmlResult :=
  <word>
    <form>{$word}</form>
    <provider>Max Planck Institute for the History of Science, Berlin</provider>
    {$morph}
    {$dict}
    {$wiki}
    {$place}
  </word>
let $retHtmlResult :=
  <html>
  <head>
  <title>Mpdl: word information</title>
  </head>
  <body>
  <table align="right" valign="top">
    <td>[<i>This is a MPDL language technology service</i>] <a href="../../info.xql?info=wordInfo" onclick="window.open(&quot;../../info.xql?info=wordInfo&quot;, &quot;InfoWindow&quot;, &quot;menubar=no,width=800,height=600,toolbar=yes,scrollbars=yes&quot;);return false"><img src="../../images/info.png" valign="bottom" width="15" height="15" border="0" alt="MPDL language technology service"/></a></td>
  </table>
  <p/>
  {$queryResultHeaderStr}
  {$morph} 
  {$dict} 
  {$wiki} 
  {$place} 
  {$commentExternalLinks} 
  <hr/>
  <p/>
  Elapsed time: {$neededTime} ms, see the <a href="/exist/xquery.xml">XQuery documentation</a> and the <a href="wordInfo.xql?_source=yes">XQuery source</a> of this page, if you find a bug <a href="https://itgroup.mpiwg-berlin.mpg.de:8080/tracs/mpdl-project-software/newticket">let us know</a>
  </body>
  </html>

let $retResult :=
  if ($output = "html")
  then $retHtmlResult 
  else if ($type = "morph" and $output = "string")
  then $morph
  else $retXmlResult
return $retResult