view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/app/Lexica.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.lt.lex.app;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;

import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;

/*
florio: 70091 records (6 of them are not xml valid)
bonitz: 14648 records (46 of them are not xml valid)
webster: 111733 records (3 of them are not xml valid)
ls: 53500 records (14 of them are not xml valid)
autenrieth: 10158 records (468 of them are not xml valid)
cooper: 33124 records (116 of them are not xml valid)
baretti: 53555 records (0 of them are not xml valid)
salmone: 6360 records (11 of them are not xml valid)
lsj: 112631 records (26922 of them are not xml valid)
 */
public class Lexica {
  private static Lexica instance;
  private static HashMap<String, Lexicon> lexica = new HashMap<String, Lexicon>();
  
  public static Lexica getInstance() {
    if (instance == null) {
      instance = new Lexica();
      instance.init();
    }
    return instance;
  }

  private void init() {
    Lexicon autenrieth = new Lexicon("autenrieth", "el");
    autenrieth.setDescription("Autenrieth, a Homeric lexicon");
    Lexicon baretti = new Lexicon("baretti", "it");
    baretti.setDescription("Baretti, a dictionary of the English and Italian languages");
    Lexicon bonitz = new Lexicon("bonitz", "el");
    bonitz.setDescription("Bonitz, index Aristotelicus");
    Lexicon cooper = new Lexicon("cooper", "la");
    cooper.setDescription("Cooper, Thesaurus Linguae Romanae et Brittanicae");
    Lexicon florio = new Lexicon("florio", "it");
    florio.setDescription("Florio, a worlde of wordes, or most copious, dictionarie in Italian and English");
    Lexicon ls = new Lexicon("ls", "la");
    ls.setDescription("Lewis and Short, Latin dictionary");
    Lexicon lsj = new Lexicon("lsj", "el");
    lsj.setDescription("Liddell-Scott-Jones, a Greek-English lexicon");
    Lexicon salmone = new Lexicon("salmone", "ar");
    salmone.setDescription("Salmoné, an advanced learner's Arabic-English dictionary");
    Lexicon salmoneUnicode = new Lexicon("salmoneUnicode", "ar");
    salmoneUnicode.setDescription("Salmoné, an advanced learner's Arabic-English dictionary");
    Lexicon webster = new Lexicon("webster", "en");
    webster.setDescription("Webster's revised unabridged dictionary (1913)");
    lexica.put("autenrieth", autenrieth);
    lexica.put("baretti", baretti);
    lexica.put("bonitz", bonitz);
    lexica.put("cooper", cooper);
    lexica.put("florio", florio);
    lexica.put("ls", ls);
    lexica.put("lsj", lsj);
    lexica.put("salmone", salmone);
    lexica.put("webster", webster);
  }
  
  public ArrayList<Lexicon> getLexicons(String lang) {
    String language = Language.getInstance().getLanguageId(lang);
    ArrayList<Lexicon> retLexicons = null;
    Set<String> keys = lexica.keySet();
    Iterator<String> it = keys.iterator();
    while (it.hasNext()) {
      String lexName = it.next();
      Lexicon lexicon = lexica.get(lexName);
      String sourceLanguage = lexicon.getSourceLanguage();
      if (sourceLanguage != null && sourceLanguage.equals(language)) {
        if (retLexicons == null)
          retLexicons = new ArrayList<Lexicon>();
        retLexicons.add(lexicon);
      }
    }
    return retLexicons;
  }

  public ArrayList<Lexicon> getLexicons() {
    ArrayList<Lexicon> retLexicons = null;
    Set<String> keys = lexica.keySet();
    Iterator<String> it = keys.iterator();
    while (it.hasNext()) {
      String lexName = it.next();
      Lexicon lexicon = lexica.get(lexName);
      if (retLexicons == null)
        retLexicons = new ArrayList<Lexicon>();
      retLexicons.add(lexicon);
    }
    return retLexicons;
  }

  public ArrayList<Lexicon> getBetacodeLexicons() {
    ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>();
    retLexicons.add(lexica.get("autenrieth"));
    retLexicons.add(lexica.get("bonitz"));
    retLexicons.add(lexica.get("lsj"));
    return retLexicons;
  }
  
  public ArrayList<Lexicon> getBuckwalterLexicons() {
    ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>();
    retLexicons.add(lexica.get("salmone"));
    return retLexicons;
  }
  
}

/*  TODO
<option value="dwds">Das Digitale Wörterbuch der deutschen Sprache</option>
<option value="grimm">Deutsches Wörterbuch von J. und W. Grimm (experimental)</option>
<option value="artfl">Dictionnaire de l'Académie francaise, 4e éd. (1762)</option>
<option value="epsd">Pennsylvania Sumerian Dictionary</option>

  else if (dictname == "dwds") lang="de";   
  else if (dictname == "grimm") lang="de";  
  else if (dictname == "artfl") lang="fr"; 
  else of (dictname == "epsd") lang="sux";

DWDS:
  
Link: http://www.dwds.de/?woerterbuch=1&qu=auto   
Logo: http://www.dwds.de/images/dwds_logo.gif  
Copyright: Copyright &copy; by Berlin-Brandenburgische Akademie der Wissenschaften, Wörterbuch der deutschen Gegenwartssprache, all rights reserved.

Grimm:

Link: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/report_lemma?wb=G&word=auto  
View: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/selectarticles?lemid=  
Output:
<html>
<head>
<title>Deutsches Wörterbuch von Jacob und Wilhelm Grimm</title>
<link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_wbb" type="text/css"></link>
<link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_add" type="text/css"></link>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"></META>
</head>
<body>

ARTFL:

Name: Dictionnaire de l'Académie francaise, 4e éd.
Vorverarbeitung des Wortes yourWord: $word =~ s/%([0-9A-F]{2})/pack("H2", $1)/ge;
Link: http://colet.uchicago.edu/cgi-bin/dico1look.pl?dicoid=ACAD1762&strippedhw=yourWord

EPSD:

Name: ePSD (Pennsylvania Sumerian Dictionary)
Link: http://psd.museum.upenn.edu/cgi-bin/epsd.plx?x=epsd&q=yourWord



 */