Mercurial > hg > mpdl-group
view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexica.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | 4a3641ae14d2 |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.dict.app; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Set; import de.mpg.mpiwg.berlin.mpdl.lt.general.Language; /* florio: 70091 records (6 of them are not xml valid) bonitz: 14648 records (46 of them are not xml valid) webster: 111733 records (3 of them are not xml valid) ls: 53500 records (14 of them are not xml valid) autenrieth: 10158 records (468 of them are not xml valid) cooper: 33124 records (116 of them are not xml valid) baretti: 53555 records (0 of them are not xml valid) salmone: 6360 records (11 of them are not xml valid) lsj: 112631 records (26922 of them are not xml valid) */ public class Lexica { private static Lexica instance; private static HashMap<String, Lexicon> localLexica = new HashMap<String, Lexicon>(); private static HashMap<String, String[]> localLangLexiconNames = new HashMap<String, String[]>(); private static HashMap<String, Lexicon> remoteLexica = new HashMap<String, Lexicon>(); public static Lexica getInstance() { if (instance == null) { instance = new Lexica(); instance.init(); } return instance; } private void init() { Lexicon autenrieth = new Lexicon("autenrieth", "el"); autenrieth.setDescription("Autenrieth: A Homeric lexicon"); autenrieth.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0073:entry="); Lexicon baretti = new Lexicon("baretti", "it"); baretti.setDescription("Baretti: A dictionary of the English and Italian languages"); Lexicon bonitz = new Lexicon("bonitz", "el"); bonitz.setDescription("Bonitz: Index Aristotelicus"); Lexicon cooper = new Lexicon("cooper", "la"); cooper.setDescription("Cooper: Thesaurus Linguae Romanae et Brittanicae"); Lexicon florio = new Lexicon("florio", "it"); florio.setDescription("Florio: a worlde of wordes, or most copious, dictionarie in Italian and English"); Lexicon ls = new Lexicon("ls", "la"); ls.setDescription("Lewis and Short: Latin dictionary"); ls.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0059:entry="); Lexicon lsj = new Lexicon("lsj", "el"); lsj.setDescription("Liddell-Scott-Jones: A Greek-English lexicon"); lsj.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0057:entry="); Lexicon salmone = new Lexicon("salmone", "ar"); salmone.setDescription("Salmone: An advanced learner's Arabic-English dictionary"); salmone.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:2002.02.0005:entry="); Lexicon webster = new Lexicon("webster", "en"); webster.setDescription("Webster's revised unabridged dictionary (1913)"); localLexica.put("autenrieth", autenrieth); localLexica.put("baretti", baretti); localLexica.put("bonitz", bonitz); localLexica.put("cooper", cooper); localLexica.put("florio", florio); localLexica.put("ls", ls); localLexica.put("lsj", lsj); localLexica.put("salmone", salmone); localLexica.put("webster", webster); localLangLexiconNames = new HashMap<String, String[]>(); String[] arLangLexNames = {"salmone"}; localLangLexiconNames.put("ar", arLangLexNames); String[] elLangLexNames = {"autenrieth", "bonitz", "lsj"}; localLangLexiconNames.put("el", elLangLexNames); String[] enLangLexNames = {"webster"}; localLangLexiconNames.put("en", enLangLexNames); String[] itLangLexNames = {"baretti", "florio"}; localLangLexiconNames.put("it", itLangLexNames); String[] laLangLexNames = {"cooper", "ls"}; localLangLexiconNames.put("la", laLangLexNames); Lexicon dwds = new Lexicon("dwds", "de"); dwds.setDescription("DWDS: Deutsches Wšrterbuch der deutschen Sprache"); dwds.setQueryUrl("http://www.dwds.de/search/?qu="); dwds.setType("remote"); Lexicon leoDE = new Lexicon("leoDE", "de"); leoDE.setDescription("LEO: German - English Dictionary"); leoDE.setQueryUrl("http://dict.leo.org/ende?lang=en&search="); leoDE.setType("remote"); Lexicon dictLeipzigDE = new Lexicon("dictLeipzigDE", "de"); dictLeipzigDE.setDescription("UniversitŠt Leipzig: German - English Dictionary"); dictLeipzigDE.setQueryUrl("http://dict.uni-leipzig.de/index.php?wort="); dictLeipzigDE.setType("remote"); Lexicon lingueeDE = new Lexicon("lingueeDE", "de"); lingueeDE.setDescription("Linguee: German - English Dictionary"); lingueeDE.setQueryUrl("http://www.linguee.de/deutsch-englisch/search?query="); lingueeDE.setType("remote"); Lexicon slater = new Lexicon("slater", "el"); slater.setDescription("William J. Slater: Lexicon to Pindar"); slater.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0072:entry="); slater.setType("remote"); Lexicon leoEN = new Lexicon("leoEN", "en"); leoEN.setDescription("LEO: German - English Dictionary"); leoEN.setQueryUrl("http://dict.leo.org/ende?lang=en&search="); leoEN.setType("remote"); Lexicon lingueeEN = new Lexicon("lingueeEN", "en"); lingueeEN.setDescription("Linguee: German - English Dictionary"); lingueeEN.setQueryUrl("http://www.linguee.de/deutsch-englisch/search?query="); lingueeEN.setType("remote"); Lexicon leoFR = new Lexicon("leoFR", "fr"); leoFR.setDescription("LEO: German - French Dictionary"); leoFR.setQueryUrl("http://dict.leo.org/frde?lang=en&search="); leoFR.setType("remote"); Lexicon artflFr = new Lexicon("artfl-fr", "fr"); artflFr.setDescription("The ARTFL project: Dictionnaires d'autrefois: French dictionaries of the 17th, 18th, 19th and 20th centuries"); artflFr.setQueryUrl("http://machaut.uchicago.edu/?resource=frengdict&action=search&french="); artflFr.setType("remote"); Lexicon artflFrEn = new Lexicon("artfl-fr-en", "fr"); artflFrEn.setDescription("The ARTFL project: French - English dictionary"); artflFrEn.setQueryUrl("http://artflx.uchicago.edu/cgi-bin/dicos/pubdico1look.pl?strippedhw="); artflFrEn.setType("remote"); Lexicon leoIT = new Lexicon("leoIT", "it"); leoIT.setDescription("LEO: German - Italian Dictionary"); leoIT.setQueryUrl("http://dict.leo.org/itde?lang=en&search="); leoIT.setType("remote"); Lexicon lewis = new Lexicon("lewis", "la"); lewis.setDescription("Charlton T. Lewis: An Elementary Latin Dictionary"); lewis.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0060:entry="); lewis.setType("remote"); Lexicon wikiwoordenboek = new Lexicon("wikiwoordenboek", "nl"); wikiwoordenboek.setDescription("Wiktionary: WikiWoordenboek"); wikiwoordenboek.setQueryUrl("http://nl.wiktionary.org/wiki/"); wikiwoordenboek.setType("remote"); Lexicon leoCH = new Lexicon("leoCH", "ch"); leoCH.setDescription("LEO: German - Chinese Dictionary"); leoCH.setQueryUrl("http://dict.leo.org/chde?lang=en&search="); leoCH.setType("remote"); Lexicon ctp = new Lexicon("ctp", "zh"); ctp.setDescription("Chinese Text Project: Chinese - English Dictionary"); ctp.setQueryUrl("http://ctext.org/dictionary.pl?if=en&char="); ctp.setType("remote"); Lexicon linyutan = new Lexicon("linyutan", "zh"); linyutan.setDescription("Lin Yutang: Chinese - English Dictionary"); linyutan.setQueryUrl("http://humanum.arts.cuhk.edu.hk/cgi-bin/agrep-lindict?query="); linyutan.setType("remote"); Lexicon chineseUnicode = new Lexicon("chinese-unicode", "zh"); chineseUnicode.setDescription("Unicode.org: Chinese - English"); chineseUnicode.setQueryUrl("http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint="); chineseUnicode.setType("remote"); Lexicon chineseWiktionary = new Lexicon("chinese-wiktionary", "zh"); chineseWiktionary.setDescription("Wiktionary: Chinese - English"); chineseWiktionary.setQueryUrl("http://en.wiktionary.org/wiki/"); chineseWiktionary.setType("remote"); remoteLexica.put("dwds", dwds); remoteLexica.put("leoDE", leoDE); remoteLexica.put("dictLeipzigDE", dictLeipzigDE); remoteLexica.put("lingueeDE", lingueeDE); remoteLexica.put("slater", slater); remoteLexica.put("leoEN", leoEN); remoteLexica.put("lingueeEN", lingueeEN); remoteLexica.put("leoFR", leoFR); remoteLexica.put("artfl-fr", artflFr); remoteLexica.put("artfl-fr-en", artflFrEn); remoteLexica.put("leoIT", leoIT); remoteLexica.put("lewis", lewis); remoteLexica.put("wikiwoordenboek", wikiwoordenboek); remoteLexica.put("ctp", ctp); remoteLexica.put("leoCH", leoCH); remoteLexica.put("linyutan", linyutan); remoteLexica.put("chinese-unicode", chineseUnicode); remoteLexica.put("chinese-wiktionary", chineseWiktionary); } public Lexicon getLexicon(String name) { Lexicon lexicon = localLexica.get(name); if (lexicon == null) lexicon = remoteLexica.get(name); return lexicon; } public ArrayList<Lexicon> getLocalLexicons(String lang) { String language = Language.getInstance().getLanguageId(lang); ArrayList<Lexicon> retLexicons = null; Set<String> keys = localLexica.keySet(); Iterator<String> it = keys.iterator(); while (it.hasNext()) { String lexName = it.next(); Lexicon lexicon = localLexica.get(lexName); String sourceLanguage = lexicon.getSourceLanguage(); if (sourceLanguage != null && sourceLanguage.equals(language)) { if (retLexicons == null) retLexicons = new ArrayList<Lexicon>(); retLexicons.add(lexicon); } } return retLexicons; } public String[] getLocalLexiconNames(String lang) { String language = Language.getInstance().getLanguageId(lang); String[] retLexiconNames = localLangLexiconNames.get(language); return retLexiconNames; } public ArrayList<Lexicon> getRemoteLexicons(String lang) { String language = Language.getInstance().getLanguageId(lang); ArrayList<Lexicon> retLexicons = null; Set<String> keys = remoteLexica.keySet(); Iterator<String> it = keys.iterator(); while (it.hasNext()) { String lexName = it.next(); Lexicon lexicon = remoteLexica.get(lexName); String sourceLanguage = lexicon.getSourceLanguage(); if (sourceLanguage != null && sourceLanguage.equals(language)) { if (retLexicons == null) retLexicons = new ArrayList<Lexicon>(); retLexicons.add(lexicon); } } if (retLexicons != null) Collections.sort(retLexicons); return retLexicons; } public ArrayList<Lexicon> getLexicons(String lang) { ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>(); ArrayList<Lexicon> localLexicons = getLocalLexicons(lang); if (localLexicons != null) { retLexicons.addAll(localLexicons); } ArrayList<Lexicon> remoteLexicons = getRemoteLexicons(lang); if (remoteLexicons != null) { retLexicons.addAll(remoteLexicons); } return retLexicons; } public ArrayList<Lexicon> getLocalLexicons() { ArrayList<Lexicon> retLexicons = null; Set<String> keys = localLexica.keySet(); Iterator<String> it = keys.iterator(); while (it.hasNext()) { String lexName = it.next(); Lexicon lexicon = localLexica.get(lexName); if (retLexicons == null) retLexicons = new ArrayList<Lexicon>(); retLexicons.add(lexicon); } return retLexicons; } public ArrayList<Lexicon> getLocalBetacodeLexicons() { ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>(); retLexicons.add(localLexica.get("autenrieth")); retLexicons.add(localLexica.get("bonitz")); retLexicons.add(localLexica.get("lsj")); return retLexicons; } public ArrayList<Lexicon> getLocalBuckwalterLexicons() { ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>(); retLexicons.add(localLexica.get("salmone")); return retLexicons; } } /* TODO <option value="dwds">Das Digitale Wörterbuch der deutschen Sprache</option> <option value="grimm">Deutsches Wörterbuch von J. und W. Grimm (experimental)</option> <option value="artfl">Dictionnaire de l'Académie francaise, 4e éd. (1762)</option> <option value="epsd">Pennsylvania Sumerian Dictionary</option> else if (dictname == "dwds") lang="de"; else if (dictname == "grimm") lang="de"; else if (dictname == "artfl") lang="fr"; else of (dictname == "epsd") lang="sux"; DWDS: Link: http://www.dwds.de/?woerterbuch=1&qu=auto Logo: http://www.dwds.de/images/dwds_logo.gif Copyright: Copyright © by Berlin-Brandenburgische Akademie der Wissenschaften, Wörterbuch der deutschen Gegenwartssprache, all rights reserved. Grimm: Link: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/report_lemma?wb=G&word=auto View: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/selectarticles?lemid= Output: <html> <head> <title>Deutsches Wörterbuch von Jacob und Wilhelm Grimm</title> <link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_wbb" type="text/css"></link> <link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_add" type="text/css"></link> <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"></META> </head> <body> ARTFL: Name: Dictionnaire de l'Académie francaise, 4e éd. Vorverarbeitung des Wortes yourWord: $word =~ s/%([0-9A-F]{2})/pack("H2", $1)/ge; Link: http://colet.uchicago.edu/cgi-bin/dico1look.pl?dicoid=ACAD1762&strippedhw=yourWord EPSD: Name: ePSD (Pennsylvania Sumerian Dictionary) Link: http://psd.museum.upenn.edu/cgi-bin/epsd.plx?x=epsd&q=yourWord */