Mercurial > hg > mpdl-group
view software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexica.java @ 19:4a3641ae14d2
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 09 Nov 2011 15:32:05 +0100 |
parents | |
children | e845310098ba |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.dict.app; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.Set; import de.mpg.mpiwg.berlin.mpdl.lt.general.Language; /* florio: 70091 records (6 of them are not xml valid) bonitz: 14648 records (46 of them are not xml valid) webster: 111733 records (3 of them are not xml valid) ls: 53500 records (14 of them are not xml valid) autenrieth: 10158 records (468 of them are not xml valid) cooper: 33124 records (116 of them are not xml valid) baretti: 53555 records (0 of them are not xml valid) salmone: 6360 records (11 of them are not xml valid) lsj: 112631 records (26922 of them are not xml valid) */ public class Lexica { private static Lexica instance; private static HashMap<String, Lexicon> localLexica = new HashMap<String, Lexicon>(); private static HashMap<String, Lexicon> remoteLexica = new HashMap<String, Lexicon>(); public static Lexica getInstance() { if (instance == null) { instance = new Lexica(); instance.init(); } return instance; } private void init() { Lexicon autenrieth = new Lexicon("autenrieth", "el"); autenrieth.setDescription("Autenrieth, a Homeric lexicon"); autenrieth.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0073:entry="); Lexicon baretti = new Lexicon("baretti", "it"); baretti.setDescription("Baretti, a dictionary of the English and Italian languages"); Lexicon bonitz = new Lexicon("bonitz", "el"); bonitz.setDescription("Bonitz, index Aristotelicus"); Lexicon cooper = new Lexicon("cooper", "la"); cooper.setDescription("Cooper, Thesaurus Linguae Romanae et Brittanicae"); Lexicon florio = new Lexicon("florio", "it"); florio.setDescription("Florio, a worlde of wordes, or most copious, dictionarie in Italian and English"); Lexicon ls = new Lexicon("ls", "la"); ls.setDescription("Lewis and Short, Latin dictionary"); ls.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0059:entry="); Lexicon lsj = new Lexicon("lsj", "el"); lsj.setDescription("Liddell-Scott-Jones, a Greek-English lexicon"); lsj.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0057:entry="); Lexicon salmone = new Lexicon("salmone", "ar"); salmone.setDescription("Salmone, an advanced learner's Arabic-English dictionary"); salmone.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:2002.02.0005:entry="); Lexicon salmoneUnicode = new Lexicon("salmoneUnicode", "ar"); salmoneUnicode.setDescription("Salmone, an advanced learner's Arabic-English dictionary"); Lexicon webster = new Lexicon("webster", "en"); webster.setDescription("Webster's revised unabridged dictionary (1913)"); localLexica.put("autenrieth", autenrieth); localLexica.put("baretti", baretti); localLexica.put("bonitz", bonitz); localLexica.put("cooper", cooper); localLexica.put("florio", florio); localLexica.put("ls", ls); localLexica.put("lsj", lsj); localLexica.put("salmone", salmone); localLexica.put("webster", webster); Lexicon dwds = new Lexicon("dwds", "de"); dwds.setDescription("Deutsches Wšrterbuch der deutschen Sprache"); dwds.setQueryUrl("http://www.dwds.de/search/?qu="); dwds.setType("remote"); Lexicon slater = new Lexicon("slater", "el"); slater.setDescription("William J. Slater, Lexicon to Pindar"); slater.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0072:entry="); slater.setType("remote"); Lexicon artflFr = new Lexicon("artfl-fr", "fr"); artflFr.setDescription("The ARTFL project: Dictionnaires d'autrefois: French dictionaries of the 17th, 18th, 19th and 20th centuries"); artflFr.setQueryUrl("http://machaut.uchicago.edu/?resource=frengdict&action=search&french="); artflFr.setType("remote"); Lexicon artflFrEn = new Lexicon("artfl-fr-en", "fr"); artflFrEn.setDescription("The ARTFL project: French - English dictionary"); artflFrEn.setQueryUrl("http://artflx.uchicago.edu/cgi-bin/dicos/pubdico1look.pl?strippedhw="); artflFrEn.setType("remote"); Lexicon lewis = new Lexicon("lewis", "la"); lewis.setDescription("Charlton T. Lewis, an Elementary Latin Dictionary"); lewis.setQueryUrl("http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0060:entry="); lewis.setType("remote"); Lexicon wikiwoordenboek = new Lexicon("wikiwoordenboek", "nl"); wikiwoordenboek.setDescription("Wiktionary: WikiWoordenboek"); wikiwoordenboek.setQueryUrl("http://nl.wiktionary.org/wiki/"); wikiwoordenboek.setType("remote"); Lexicon ctp = new Lexicon("ctp", "zh"); ctp.setDescription("Chinese Text Project"); ctp.setQueryUrl("http://ctext.org/dictionary.pl?if=en&char="); ctp.setType("remote"); Lexicon linyutan = new Lexicon("linyutan", "zh"); linyutan.setDescription("Lin Yutang"); linyutan.setQueryUrl("http://humanum.arts.cuhk.edu.hk/cgi-bin/agrep-lindict?query="); linyutan.setType("remote"); Lexicon chineseUnicode = new Lexicon("chinese-unicode", "zh"); chineseUnicode.setDescription("Unicode"); chineseUnicode.setQueryUrl("http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint="); chineseUnicode.setType("remote"); Lexicon chineseWiktionary = new Lexicon("chinese-wiktionary", "zh"); chineseWiktionary.setDescription("Wiktionary"); chineseWiktionary.setQueryUrl("http://en.wiktionary.org/wiki/"); chineseWiktionary.setType("remote"); remoteLexica.put("dwds", dwds); remoteLexica.put("slater", slater); remoteLexica.put("artfl-fr", artflFr); remoteLexica.put("artfl-fr-en", artflFrEn); remoteLexica.put("lewis", lewis); remoteLexica.put("wikiwoordenboek", wikiwoordenboek); remoteLexica.put("ctp", ctp); remoteLexica.put("linyutan", linyutan); remoteLexica.put("chinese-unicode", chineseUnicode); remoteLexica.put("chinese-wiktionary", chineseWiktionary); } public Lexicon getLexicon(String name) { Lexicon lexicon = localLexica.get(name); if (lexicon == null) lexicon = remoteLexica.get(name); return lexicon; } public ArrayList<Lexicon> getLocalLexicons(String lang) { String language = Language.getInstance().getLanguageId(lang); ArrayList<Lexicon> retLexicons = null; Set<String> keys = localLexica.keySet(); Iterator<String> it = keys.iterator(); while (it.hasNext()) { String lexName = it.next(); Lexicon lexicon = localLexica.get(lexName); String sourceLanguage = lexicon.getSourceLanguage(); if (sourceLanguage != null && sourceLanguage.equals(language)) { if (retLexicons == null) retLexicons = new ArrayList<Lexicon>(); retLexicons.add(lexicon); } } return retLexicons; } public ArrayList<Lexicon> getRemoteLexicons(String lang) { String language = Language.getInstance().getLanguageId(lang); ArrayList<Lexicon> retLexicons = null; Set<String> keys = remoteLexica.keySet(); Iterator<String> it = keys.iterator(); while (it.hasNext()) { String lexName = it.next(); Lexicon lexicon = remoteLexica.get(lexName); String sourceLanguage = lexicon.getSourceLanguage(); if (sourceLanguage != null && sourceLanguage.equals(language)) { if (retLexicons == null) retLexicons = new ArrayList<Lexicon>(); retLexicons.add(lexicon); } } return retLexicons; } public ArrayList<Lexicon> getLexicons(String lang) { ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>(); ArrayList<Lexicon> localLexicons = getLocalLexicons(lang); if (localLexicons != null) { retLexicons.addAll(localLexicons); } ArrayList<Lexicon> remoteLexicons = getRemoteLexicons(lang); if (remoteLexicons != null) { retLexicons.addAll(remoteLexicons); } return retLexicons; } public ArrayList<Lexicon> getLocalLexicons() { ArrayList<Lexicon> retLexicons = null; Set<String> keys = localLexica.keySet(); Iterator<String> it = keys.iterator(); while (it.hasNext()) { String lexName = it.next(); Lexicon lexicon = localLexica.get(lexName); if (retLexicons == null) retLexicons = new ArrayList<Lexicon>(); retLexicons.add(lexicon); } return retLexicons; } public ArrayList<Lexicon> getLocalBetacodeLexicons() { ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>(); retLexicons.add(localLexica.get("autenrieth")); retLexicons.add(localLexica.get("bonitz")); retLexicons.add(localLexica.get("lsj")); return retLexicons; } public ArrayList<Lexicon> getLocalBuckwalterLexicons() { ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>(); retLexicons.add(localLexica.get("salmone")); return retLexicons; } } /* TODO <option value="dwds">Das Digitale Wörterbuch der deutschen Sprache</option> <option value="grimm">Deutsches Wörterbuch von J. und W. Grimm (experimental)</option> <option value="artfl">Dictionnaire de l'Académie francaise, 4e éd. (1762)</option> <option value="epsd">Pennsylvania Sumerian Dictionary</option> else if (dictname == "dwds") lang="de"; else if (dictname == "grimm") lang="de"; else if (dictname == "artfl") lang="fr"; else of (dictname == "epsd") lang="sux"; DWDS: Link: http://www.dwds.de/?woerterbuch=1&qu=auto Logo: http://www.dwds.de/images/dwds_logo.gif Copyright: Copyright © by Berlin-Brandenburgische Akademie der Wissenschaften, Wörterbuch der deutschen Gegenwartssprache, all rights reserved. Grimm: Link: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/report_lemma?wb=G&word=auto View: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/selectarticles?lemid= Output: <html> <head> <title>Deutsches Wörterbuch von Jacob und Wilhelm Grimm</title> <link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_wbb" type="text/css"></link> <link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_add" type="text/css"></link> <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"></META> </head> <body> ARTFL: Name: Dictionnaire de l'Académie francaise, 4e éd. Vorverarbeitung des Wortes yourWord: $word =~ s/%([0-9A-F]{2})/pack("H2", $1)/ge; Link: http://colet.uchicago.edu/cgi-bin/dico1look.pl?dicoid=ACAD1762&strippedhw=yourWord EPSD: Name: ePSD (Pennsylvania Sumerian Dictionary) Link: http://psd.museum.upenn.edu/cgi-bin/epsd.plx?x=epsd&q=yourWord */