view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexica.java @ 19:4a3641ae14d2

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 09 Nov 2011 15:32:05 +0100
parents
children e845310098ba
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.lt.dict.app;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;

import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;

/*
florio: 70091 records (6 of them are not xml valid)
bonitz: 14648 records (46 of them are not xml valid)
webster: 111733 records (3 of them are not xml valid)
ls: 53500 records (14 of them are not xml valid)
autenrieth: 10158 records (468 of them are not xml valid)
cooper: 33124 records (116 of them are not xml valid)
baretti: 53555 records (0 of them are not xml valid)
salmone: 6360 records (11 of them are not xml valid)
lsj: 112631 records (26922 of them are not xml valid)
 */
public class Lexica {
  private static Lexica instance;
  private static HashMap<String, Lexicon> localLexica = new HashMap<String, Lexicon>();
  private static HashMap<String, Lexicon> remoteLexica = new HashMap<String, Lexicon>();
  
  public static Lexica getInstance() {
    if (instance == null) {
      instance = new Lexica();
      instance.init();
    }
    return instance;
  }

  private void init() {
    Lexicon autenrieth = new Lexicon("autenrieth", "el");
    autenrieth.setDescription("Autenrieth, a Homeric lexicon");
    autenrieth.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0073:entry=");
    Lexicon baretti = new Lexicon("baretti", "it");
    baretti.setDescription("Baretti, a dictionary of the English and Italian languages");
    Lexicon bonitz = new Lexicon("bonitz", "el");
    bonitz.setDescription("Bonitz, index Aristotelicus");
    Lexicon cooper = new Lexicon("cooper", "la");
    cooper.setDescription("Cooper, Thesaurus Linguae Romanae et Brittanicae");
    Lexicon florio = new Lexicon("florio", "it");
    florio.setDescription("Florio, a worlde of wordes, or most copious, dictionarie in Italian and English");
    Lexicon ls = new Lexicon("ls", "la");
    ls.setDescription("Lewis and Short, Latin dictionary");
    ls.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0059:entry=");
    Lexicon lsj = new Lexicon("lsj", "el");
    lsj.setDescription("Liddell-Scott-Jones, a Greek-English lexicon");
    lsj.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0057:entry=");
    Lexicon salmone = new Lexicon("salmone", "ar");
    salmone.setDescription("Salmone, an advanced learner's Arabic-English dictionary");
    salmone.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:2002.02.0005:entry=");
    Lexicon salmoneUnicode = new Lexicon("salmoneUnicode", "ar");
    salmoneUnicode.setDescription("Salmone, an advanced learner's Arabic-English dictionary");
    Lexicon webster = new Lexicon("webster", "en");
    webster.setDescription("Webster's revised unabridged dictionary (1913)");
    localLexica.put("autenrieth", autenrieth);
    localLexica.put("baretti", baretti);
    localLexica.put("bonitz", bonitz);
    localLexica.put("cooper", cooper);
    localLexica.put("florio", florio);
    localLexica.put("ls", ls);
    localLexica.put("lsj", lsj);
    localLexica.put("salmone", salmone);
    localLexica.put("webster", webster);
    Lexicon dwds = new Lexicon("dwds", "de");
    dwds.setDescription("Deutsches Wšrterbuch der deutschen Sprache");
    dwds.setQueryUrl("http://www.dwds.de/search/?qu=");
    dwds.setType("remote");
    Lexicon slater = new Lexicon("slater", "el");
    slater.setDescription("William J. Slater, Lexicon to Pindar");
    slater.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0072:entry=");
    slater.setType("remote");
    Lexicon artflFr = new Lexicon("artfl-fr", "fr");
    artflFr.setDescription("The ARTFL project: Dictionnaires d'autrefois: French dictionaries of the 17th, 18th, 19th and 20th centuries");
    artflFr.setQueryUrl("http://machaut.uchicago.edu/?resource=frengdict&amp;action=search&amp;french=");
    artflFr.setType("remote");
    Lexicon artflFrEn = new Lexicon("artfl-fr-en", "fr");
    artflFrEn.setDescription("The ARTFL project: French - English dictionary");
    artflFrEn.setQueryUrl("http://artflx.uchicago.edu/cgi-bin/dicos/pubdico1look.pl?strippedhw=");
    artflFrEn.setType("remote");
    Lexicon lewis = new Lexicon("lewis", "la");
    lewis.setDescription("Charlton T. Lewis, an Elementary Latin Dictionary");
    lewis.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0060:entry=");
    lewis.setType("remote");
    Lexicon wikiwoordenboek = new Lexicon("wikiwoordenboek", "nl");
    wikiwoordenboek.setDescription("Wiktionary: WikiWoordenboek");
    wikiwoordenboek.setQueryUrl("http://nl.wiktionary.org/wiki/");
    wikiwoordenboek.setType("remote");
    Lexicon ctp = new Lexicon("ctp", "zh");
    ctp.setDescription("Chinese Text Project");
    ctp.setQueryUrl("http://ctext.org/dictionary.pl?if=en&amp;char=");
    ctp.setType("remote");
    Lexicon linyutan = new Lexicon("linyutan", "zh");
    linyutan.setDescription("Lin Yutang");
    linyutan.setQueryUrl("http://humanum.arts.cuhk.edu.hk/cgi-bin/agrep-lindict?query=");
    linyutan.setType("remote");
    Lexicon chineseUnicode = new Lexicon("chinese-unicode", "zh");
    chineseUnicode.setDescription("Unicode");
    chineseUnicode.setQueryUrl("http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint=");
    chineseUnicode.setType("remote");
    Lexicon chineseWiktionary = new Lexicon("chinese-wiktionary", "zh");
    chineseWiktionary.setDescription("Wiktionary");
    chineseWiktionary.setQueryUrl("http://en.wiktionary.org/wiki/");
    chineseWiktionary.setType("remote");
    remoteLexica.put("dwds", dwds);
    remoteLexica.put("slater", slater);
    remoteLexica.put("artfl-fr", artflFr);
    remoteLexica.put("artfl-fr-en", artflFrEn);
    remoteLexica.put("lewis", lewis);
    remoteLexica.put("wikiwoordenboek", wikiwoordenboek);
    remoteLexica.put("ctp", ctp);
    remoteLexica.put("linyutan", linyutan);
    remoteLexica.put("chinese-unicode", chineseUnicode);
    remoteLexica.put("chinese-wiktionary", chineseWiktionary);
  }
  
  public Lexicon getLexicon(String name) {
    Lexicon lexicon = localLexica.get(name);
    if (lexicon == null)
      lexicon = remoteLexica.get(name);
    return lexicon;
  }
  
  public ArrayList<Lexicon> getLocalLexicons(String lang) {
    String language = Language.getInstance().getLanguageId(lang);
    ArrayList<Lexicon> retLexicons = null;
    Set<String> keys = localLexica.keySet();
    Iterator<String> it = keys.iterator();
    while (it.hasNext()) {
      String lexName = it.next();
      Lexicon lexicon = localLexica.get(lexName);
      String sourceLanguage = lexicon.getSourceLanguage();
      if (sourceLanguage != null && sourceLanguage.equals(language)) {
        if (retLexicons == null)
          retLexicons = new ArrayList<Lexicon>();
        retLexicons.add(lexicon);
      }
    }
    return retLexicons;
  }

  public ArrayList<Lexicon> getRemoteLexicons(String lang) {
    String language = Language.getInstance().getLanguageId(lang);
    ArrayList<Lexicon> retLexicons = null;
    Set<String> keys = remoteLexica.keySet();
    Iterator<String> it = keys.iterator();
    while (it.hasNext()) {
      String lexName = it.next();
      Lexicon lexicon = remoteLexica.get(lexName);
      String sourceLanguage = lexicon.getSourceLanguage();
      if (sourceLanguage != null && sourceLanguage.equals(language)) {
        if (retLexicons == null)
          retLexicons = new ArrayList<Lexicon>();
        retLexicons.add(lexicon);
      }
    }
    return retLexicons;
  }

  public ArrayList<Lexicon> getLexicons(String lang) {
    ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>();
    ArrayList<Lexicon> localLexicons = getLocalLexicons(lang);
    if (localLexicons != null) {
      retLexicons.addAll(localLexicons);
    }
    ArrayList<Lexicon> remoteLexicons = getRemoteLexicons(lang);
    if (remoteLexicons != null) {
      retLexicons.addAll(remoteLexicons);
    }
    return retLexicons;
  }

  public ArrayList<Lexicon> getLocalLexicons() {
    ArrayList<Lexicon> retLexicons = null;
    Set<String> keys = localLexica.keySet();
    Iterator<String> it = keys.iterator();
    while (it.hasNext()) {
      String lexName = it.next();
      Lexicon lexicon = localLexica.get(lexName);
      if (retLexicons == null)
        retLexicons = new ArrayList<Lexicon>();
      retLexicons.add(lexicon);
    }
    return retLexicons;
  }

  public ArrayList<Lexicon> getLocalBetacodeLexicons() {
    ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>();
    retLexicons.add(localLexica.get("autenrieth"));
    retLexicons.add(localLexica.get("bonitz"));
    retLexicons.add(localLexica.get("lsj"));
    return retLexicons;
  }
  
  public ArrayList<Lexicon> getLocalBuckwalterLexicons() {
    ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>();
    retLexicons.add(localLexica.get("salmone"));
    return retLexicons;
  }
  
}

/*  TODO
<option value="dwds">Das Digitale Wörterbuch der deutschen Sprache</option>
<option value="grimm">Deutsches Wörterbuch von J. und W. Grimm (experimental)</option>
<option value="artfl">Dictionnaire de l'Académie francaise, 4e éd. (1762)</option>
<option value="epsd">Pennsylvania Sumerian Dictionary</option>

  else if (dictname == "dwds") lang="de";   
  else if (dictname == "grimm") lang="de";  
  else if (dictname == "artfl") lang="fr"; 
  else of (dictname == "epsd") lang="sux";

DWDS:
  
Link: http://www.dwds.de/?woerterbuch=1&qu=auto   
Logo: http://www.dwds.de/images/dwds_logo.gif  
Copyright: Copyright &copy; by Berlin-Brandenburgische Akademie der Wissenschaften, Wörterbuch der deutschen Gegenwartssprache, all rights reserved.

Grimm:

Link: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/report_lemma?wb=G&word=auto  
View: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/selectarticles?lemid=  
Output:
<html>
<head>
<title>Deutsches Wörterbuch von Jacob und Wilhelm Grimm</title>
<link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_wbb" type="text/css"></link>
<link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_add" type="text/css"></link>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"></META>
</head>
<body>

ARTFL:

Name: Dictionnaire de l'Académie francaise, 4e éd.
Vorverarbeitung des Wortes yourWord: $word =~ s/%([0-9A-F]{2})/pack("H2", $1)/ge;
Link: http://colet.uchicago.edu/cgi-bin/dico1look.pl?dicoid=ACAD1762&strippedhw=yourWord

EPSD:

Name: ePSD (Pennsylvania Sumerian Dictionary)
Link: http://psd.museum.upenn.edu/cgi-bin/epsd.plx?x=epsd&q=yourWord



 */