view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexica.java @ 23:e845310098ba

diverse Korrekturen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Nov 2012 12:35:19 +0100
parents 4a3641ae14d2
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.lt.dict.app;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;

import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;

/*
florio: 70091 records (6 of them are not xml valid)
bonitz: 14648 records (46 of them are not xml valid)
webster: 111733 records (3 of them are not xml valid)
ls: 53500 records (14 of them are not xml valid)
autenrieth: 10158 records (468 of them are not xml valid)
cooper: 33124 records (116 of them are not xml valid)
baretti: 53555 records (0 of them are not xml valid)
salmone: 6360 records (11 of them are not xml valid)
lsj: 112631 records (26922 of them are not xml valid)
 */
public class Lexica {
  private static Lexica instance;
  private static HashMap<String, Lexicon> localLexica = new HashMap<String, Lexicon>();
  private static HashMap<String, String[]> localLangLexiconNames = new HashMap<String, String[]>();
  private static HashMap<String, Lexicon> remoteLexica = new HashMap<String, Lexicon>();
  
  public static Lexica getInstance() {
    if (instance == null) {
      instance = new Lexica();
      instance.init();
    }
    return instance;
  }

  private void init() {
    Lexicon autenrieth = new Lexicon("autenrieth", "el");
    autenrieth.setDescription("Autenrieth: A Homeric lexicon");
    autenrieth.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0073:entry=");
    Lexicon baretti = new Lexicon("baretti", "it");
    baretti.setDescription("Baretti: A dictionary of the English and Italian languages");
    Lexicon bonitz = new Lexicon("bonitz", "el");
    bonitz.setDescription("Bonitz: Index Aristotelicus");
    Lexicon cooper = new Lexicon("cooper", "la");
    cooper.setDescription("Cooper: Thesaurus Linguae Romanae et Brittanicae");
    Lexicon florio = new Lexicon("florio", "it");
    florio.setDescription("Florio: a worlde of wordes, or most copious, dictionarie in Italian and English");
    Lexicon ls = new Lexicon("ls", "la");
    ls.setDescription("Lewis and Short: Latin dictionary");
    ls.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0059:entry=");
    Lexicon lsj = new Lexicon("lsj", "el");
    lsj.setDescription("Liddell-Scott-Jones: A Greek-English lexicon");
    lsj.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0057:entry=");
    Lexicon salmone = new Lexicon("salmone", "ar");
    salmone.setDescription("Salmone: An advanced learner's Arabic-English dictionary");
    salmone.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:2002.02.0005:entry=");
    Lexicon webster = new Lexicon("webster", "en");
    webster.setDescription("Webster's revised unabridged dictionary (1913)");
    localLexica.put("autenrieth", autenrieth);
    localLexica.put("baretti", baretti);
    localLexica.put("bonitz", bonitz);
    localLexica.put("cooper", cooper);
    localLexica.put("florio", florio);
    localLexica.put("ls", ls);
    localLexica.put("lsj", lsj);
    localLexica.put("salmone", salmone);
    localLexica.put("webster", webster);
    localLangLexiconNames = new HashMap<String, String[]>();
    String[] arLangLexNames = {"salmone"};
    localLangLexiconNames.put("ar", arLangLexNames);
    String[] elLangLexNames = {"autenrieth", "bonitz", "lsj"};
    localLangLexiconNames.put("el", elLangLexNames);
    String[] enLangLexNames = {"webster"};
    localLangLexiconNames.put("en", enLangLexNames);
    String[] itLangLexNames = {"baretti", "florio"};
    localLangLexiconNames.put("it", itLangLexNames);
    String[] laLangLexNames = {"cooper", "ls"};
    localLangLexiconNames.put("la", laLangLexNames);
    Lexicon dwds = new Lexicon("dwds", "de");
    dwds.setDescription("DWDS: Deutsches Wšrterbuch der deutschen Sprache");
    dwds.setQueryUrl("http://www.dwds.de/search/?qu=");
    dwds.setType("remote");
    Lexicon leoDE = new Lexicon("leoDE", "de");
    leoDE.setDescription("LEO: German - English Dictionary");
    leoDE.setQueryUrl("http://dict.leo.org/ende?lang=en&search="); 
    leoDE.setType("remote");
    Lexicon dictLeipzigDE = new Lexicon("dictLeipzigDE", "de");
    dictLeipzigDE.setDescription("UniversitŠt Leipzig: German - English Dictionary");
    dictLeipzigDE.setQueryUrl("http://dict.uni-leipzig.de/index.php?wort=");
    dictLeipzigDE.setType("remote");
    Lexicon lingueeDE = new Lexicon("lingueeDE", "de");
    lingueeDE.setDescription("Linguee: German - English Dictionary");
    lingueeDE.setQueryUrl("http://www.linguee.de/deutsch-englisch/search?query=");
    lingueeDE.setType("remote");
    Lexicon slater = new Lexicon("slater", "el");
    slater.setDescription("William J. Slater: Lexicon to Pindar");
    slater.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0072:entry=");
    slater.setType("remote");
    Lexicon leoEN = new Lexicon("leoEN", "en");
    leoEN.setDescription("LEO: German - English Dictionary");
    leoEN.setQueryUrl("http://dict.leo.org/ende?lang=en&search=");
    leoEN.setType("remote");
    Lexicon lingueeEN = new Lexicon("lingueeEN", "en");
    lingueeEN.setDescription("Linguee: German - English Dictionary");
    lingueeEN.setQueryUrl("http://www.linguee.de/deutsch-englisch/search?query=");
    lingueeEN.setType("remote");
    Lexicon leoFR = new Lexicon("leoFR", "fr");
    leoFR.setDescription("LEO: German - French Dictionary");
    leoFR.setQueryUrl("http://dict.leo.org/frde?lang=en&search=");
    leoFR.setType("remote");
    Lexicon artflFr = new Lexicon("artfl-fr", "fr");
    artflFr.setDescription("The ARTFL project: Dictionnaires d'autrefois: French dictionaries of the 17th, 18th, 19th and 20th centuries");
    artflFr.setQueryUrl("http://machaut.uchicago.edu/?resource=frengdict&amp;action=search&amp;french=");
    artflFr.setType("remote");
    Lexicon artflFrEn = new Lexicon("artfl-fr-en", "fr");
    artflFrEn.setDescription("The ARTFL project: French - English dictionary");
    artflFrEn.setQueryUrl("http://artflx.uchicago.edu/cgi-bin/dicos/pubdico1look.pl?strippedhw=");
    artflFrEn.setType("remote");
    Lexicon leoIT = new Lexicon("leoIT", "it");
    leoIT.setDescription("LEO: German - Italian Dictionary");
    leoIT.setQueryUrl("http://dict.leo.org/itde?lang=en&search=");
    leoIT.setType("remote");
    Lexicon lewis = new Lexicon("lewis", "la");
    lewis.setDescription("Charlton T. Lewis: An Elementary Latin Dictionary");
    lewis.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0060:entry=");
    lewis.setType("remote");
    Lexicon wikiwoordenboek = new Lexicon("wikiwoordenboek", "nl");
    wikiwoordenboek.setDescription("Wiktionary: WikiWoordenboek");
    wikiwoordenboek.setQueryUrl("http://nl.wiktionary.org/wiki/");
    wikiwoordenboek.setType("remote");
    Lexicon leoCH = new Lexicon("leoCH", "ch");
    leoCH.setDescription("LEO: German - Chinese Dictionary");
    leoCH.setQueryUrl("http://dict.leo.org/chde?lang=en&search=");
    leoCH.setType("remote");
    Lexicon ctp = new Lexicon("ctp", "zh");
    ctp.setDescription("Chinese Text Project: Chinese - English Dictionary");
    ctp.setQueryUrl("http://ctext.org/dictionary.pl?if=en&amp;char=");
    ctp.setType("remote");
    Lexicon linyutan = new Lexicon("linyutan", "zh");
    linyutan.setDescription("Lin Yutang: Chinese - English Dictionary");
    linyutan.setQueryUrl("http://humanum.arts.cuhk.edu.hk/cgi-bin/agrep-lindict?query=");
    linyutan.setType("remote");
    Lexicon chineseUnicode = new Lexicon("chinese-unicode", "zh");
    chineseUnicode.setDescription("Unicode.org: Chinese - English");
    chineseUnicode.setQueryUrl("http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=");
    chineseUnicode.setType("remote");
    Lexicon chineseWiktionary = new Lexicon("chinese-wiktionary", "zh");
    chineseWiktionary.setDescription("Wiktionary: Chinese - English");
    chineseWiktionary.setQueryUrl("http://en.wiktionary.org/wiki/");
    chineseWiktionary.setType("remote");
    remoteLexica.put("dwds", dwds);
    remoteLexica.put("leoDE", leoDE);
    remoteLexica.put("dictLeipzigDE", dictLeipzigDE);
    remoteLexica.put("lingueeDE", lingueeDE);
    remoteLexica.put("slater", slater);
    remoteLexica.put("leoEN", leoEN);
    remoteLexica.put("lingueeEN", lingueeEN);
    remoteLexica.put("leoFR", leoFR);
    remoteLexica.put("artfl-fr", artflFr);
    remoteLexica.put("artfl-fr-en", artflFrEn);
    remoteLexica.put("leoIT", leoIT);
    remoteLexica.put("lewis", lewis);
    remoteLexica.put("wikiwoordenboek", wikiwoordenboek);
    remoteLexica.put("ctp", ctp);
    remoteLexica.put("leoCH", leoCH);
    remoteLexica.put("linyutan", linyutan);
    remoteLexica.put("chinese-unicode", chineseUnicode);
    remoteLexica.put("chinese-wiktionary", chineseWiktionary);
  }
  
  public Lexicon getLexicon(String name) {
    Lexicon lexicon = localLexica.get(name);
    if (lexicon == null)
      lexicon = remoteLexica.get(name);
    return lexicon;
  }
  
  public ArrayList<Lexicon> getLocalLexicons(String lang) {
    String language = Language.getInstance().getLanguageId(lang);
    ArrayList<Lexicon> retLexicons = null;
    Set<String> keys = localLexica.keySet();
    Iterator<String> it = keys.iterator();
    while (it.hasNext()) {
      String lexName = it.next();
      Lexicon lexicon = localLexica.get(lexName);
      String sourceLanguage = lexicon.getSourceLanguage();
      if (sourceLanguage != null && sourceLanguage.equals(language)) {
        if (retLexicons == null)
          retLexicons = new ArrayList<Lexicon>();
        retLexicons.add(lexicon);
      }
    }
    return retLexicons;
  }

  public String[] getLocalLexiconNames(String lang) {
    String language = Language.getInstance().getLanguageId(lang);
    String[] retLexiconNames = localLangLexiconNames.get(language);
    return retLexiconNames;
  }

  public ArrayList<Lexicon> getRemoteLexicons(String lang) {
    String language = Language.getInstance().getLanguageId(lang);
    ArrayList<Lexicon> retLexicons = null;
    Set<String> keys = remoteLexica.keySet();
    Iterator<String> it = keys.iterator();
    while (it.hasNext()) {
      String lexName = it.next();
      Lexicon lexicon = remoteLexica.get(lexName);
      String sourceLanguage = lexicon.getSourceLanguage();
      if (sourceLanguage != null && sourceLanguage.equals(language)) {
        if (retLexicons == null)
          retLexicons = new ArrayList<Lexicon>();
        retLexicons.add(lexicon);
      }
    }
    if (retLexicons != null)
      Collections.sort(retLexicons);
    return retLexicons;
  }

  public ArrayList<Lexicon> getLexicons(String lang) {
    ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>();
    ArrayList<Lexicon> localLexicons = getLocalLexicons(lang);
    if (localLexicons != null) {
      retLexicons.addAll(localLexicons);
    }
    ArrayList<Lexicon> remoteLexicons = getRemoteLexicons(lang);
    if (remoteLexicons != null) {
      retLexicons.addAll(remoteLexicons);
    }
    return retLexicons;
  }

  public ArrayList<Lexicon> getLocalLexicons() {
    ArrayList<Lexicon> retLexicons = null;
    Set<String> keys = localLexica.keySet();
    Iterator<String> it = keys.iterator();
    while (it.hasNext()) {
      String lexName = it.next();
      Lexicon lexicon = localLexica.get(lexName);
      if (retLexicons == null)
        retLexicons = new ArrayList<Lexicon>();
      retLexicons.add(lexicon);
    }
    return retLexicons;
  }

  public ArrayList<Lexicon> getLocalBetacodeLexicons() {
    ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>();
    retLexicons.add(localLexica.get("autenrieth"));
    retLexicons.add(localLexica.get("bonitz"));
    retLexicons.add(localLexica.get("lsj"));
    return retLexicons;
  }
  
  public ArrayList<Lexicon> getLocalBuckwalterLexicons() {
    ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>();
    retLexicons.add(localLexica.get("salmone"));
    return retLexicons;
  }
  
}

/*  TODO
<option value="dwds">Das Digitale Wörterbuch der deutschen Sprache</option>
<option value="grimm">Deutsches Wörterbuch von J. und W. Grimm (experimental)</option>
<option value="artfl">Dictionnaire de l'Académie francaise, 4e éd. (1762)</option>
<option value="epsd">Pennsylvania Sumerian Dictionary</option>

  else if (dictname == "dwds") lang="de";   
  else if (dictname == "grimm") lang="de";  
  else if (dictname == "artfl") lang="fr"; 
  else of (dictname == "epsd") lang="sux";

DWDS:
  
Link: http://www.dwds.de/?woerterbuch=1&qu=auto   
Logo: http://www.dwds.de/images/dwds_logo.gif  
Copyright: Copyright &copy; by Berlin-Brandenburgische Akademie der Wissenschaften, Wörterbuch der deutschen Gegenwartssprache, all rights reserved.

Grimm:

Link: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/report_lemma?wb=G&word=auto  
View: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/selectarticles?lemid=  
Output:
<html>
<head>
<title>Deutsches Wörterbuch von Jacob und Wilhelm Grimm</title>
<link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_wbb" type="text/css"></link>
<link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_add" type="text/css"></link>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"></META>
</head>
<body>

ARTFL:

Name: Dictionnaire de l'Académie francaise, 4e éd.
Vorverarbeitung des Wortes yourWord: $word =~ s/%([0-9A-F]{2})/pack("H2", $1)/ge;
Link: http://colet.uchicago.edu/cgi-bin/dico1look.pl?dicoid=ACAD1762&strippedhw=yourWord

EPSD:

Name: ePSD (Pennsylvania Sumerian Dictionary)
Link: http://psd.museum.upenn.edu/cgi-bin/epsd.plx?x=epsd&q=yourWord



 */