Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriter.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:408254cf2f1d |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.lt.morph.db; | |
2 | |
3 import java.io.BufferedInputStream; | |
4 import java.io.File; | |
5 import java.io.FileInputStream; | |
6 import java.io.IOException; | |
7 import java.io.InputStream; | |
8 import java.util.ArrayList; | |
9 import java.util.Date; | |
10 | |
11 import org.xml.sax.InputSource; | |
12 import org.xml.sax.SAXException; | |
13 import org.xml.sax.XMLReader; | |
14 | |
15 import com.sun.org.apache.xerces.internal.parsers.SAXParser; | |
16 | |
17 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; | |
18 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; | |
19 import de.mpg.mpiwg.berlin.mpdl.lt.morph.db.DBMorphHandler; | |
20 import de.mpg.mpiwg.berlin.mpdl.util.Util; | |
21 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
22 import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; | |
23 | |
24 public class DBMorphWriter { | |
25 private static DBMorphWriter instance; | |
26 private static String MPDL_DATA_DIR = MpdlConstants.MPDL_DATA_DIR; | |
27 private static String DB_DIR_DONATUS = MPDL_DATA_DIR + "/dataBerkeleyDB/donatus"; | |
28 private static String DATA_FILES_DIR = MPDL_DATA_DIR + "/dataFiles"; | |
29 private DBMorphHandler dbMorphHandler; | |
30 private Date beginOfOperation; | |
31 private Date endOfOperation; | |
32 | |
33 public static DBMorphWriter getInstance() throws ApplicationException { | |
34 if (instance == null) { | |
35 instance = new DBMorphWriter(); | |
36 instance.init(); | |
37 } | |
38 return instance; | |
39 } | |
40 | |
41 /** | |
42 * | |
43 */ | |
44 public static void main(String[] args) throws ApplicationException { | |
45 getInstance(); | |
46 instance.beginOperation(); | |
47 System.out.println("Start ..."); | |
48 instance.init(); | |
49 instance.openMorphData(); | |
50 // instance.deleteMorphData(); | |
51 long size = instance.getSize(); | |
52 System.out.println("Count forms: " + size); | |
53 // instance.writeMorphData(); | |
54 // instance.readSampleData(); | |
55 instance.end(); | |
56 instance.endOperation(); | |
57 Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation); | |
58 System.out.println("End."); | |
59 System.out.println("Needed time: " + elapsedTime + " seconds"); | |
60 } | |
61 | |
62 private void init() throws ApplicationException { | |
63 dbMorphHandler = new DBMorphHandler(DB_DIR_DONATUS); | |
64 dbMorphHandler.start(); | |
65 } | |
66 | |
67 private void openMorphData() throws ApplicationException { | |
68 dbMorphHandler.openDatabases(); | |
69 } | |
70 | |
71 private void deleteMorphData() throws ApplicationException { | |
72 dbMorphHandler.deleteMorphData(); | |
73 } | |
74 | |
75 private void writeMorphData() throws ApplicationException { | |
76 String inputFileNameLatin = DATA_FILES_DIR + "/" + "perseus-latin-forms.xml"; | |
77 instance.write(inputFileNameLatin); | |
78 String inputFileNameGreek = DATA_FILES_DIR + "/" + "perseus-greek-forms.xml"; | |
79 instance.write(inputFileNameGreek); | |
80 String inputFileNameArabic = DATA_FILES_DIR + "/" + "perseus-arabic-forms.xml"; | |
81 instance.write(inputFileNameArabic); | |
82 String inputFileNameDutch = DATA_FILES_DIR + "/" + "celex-dutch-forms.xml"; | |
83 instance.write(inputFileNameDutch); | |
84 String inputFileNameGerman = DATA_FILES_DIR + "/" + "celex-german-forms.xml"; | |
85 instance.write(inputFileNameGerman); | |
86 String inputFileNameEnglish = DATA_FILES_DIR + "/" + "celex-english-forms.xml"; | |
87 instance.write(inputFileNameEnglish); | |
88 String inputFileNameFrench = DATA_FILES_DIR + "/" + "lexique-french-forms.xml"; | |
89 instance.write(inputFileNameFrench); | |
90 String inputFileNameItalian = DATA_FILES_DIR + "/" + "donatus-italian-forms.xml"; | |
91 instance.write(inputFileNameItalian); | |
92 String[] languages = {"ar", "de", "en", "el", "fr", "it", "la"}; | |
93 for (int i = 0; i < languages.length; i++) { | |
94 String language = languages[i]; | |
95 String inputFileNameDonatusSup = DATA_FILES_DIR + "/" + "donatus-sup-" + language + "-forms.xml"; | |
96 instance.write(inputFileNameDonatusSup); | |
97 } | |
98 String[] donatusAdditionalSups = {"cache-la", "cache-el", "cache-it"}; | |
99 for (int i = 0; i < donatusAdditionalSups.length; i++) { | |
100 String donatusAdditionalSupName = donatusAdditionalSups[i]; | |
101 String inputFileNameDonatusAddSup = DATA_FILES_DIR + "/donatusAdditionalSup/" + "donatus-sup-" + donatusAdditionalSupName + ".xml"; | |
102 instance.write(inputFileNameDonatusAddSup); | |
103 } | |
104 } | |
105 | |
106 private void write(String inputFileName) throws ApplicationException { | |
107 File inputFile = new File(inputFileName); | |
108 if (! inputFile.exists()) { | |
109 System.out.println("Input file: " + inputFile.getAbsolutePath() + " does not exist."); | |
110 return; | |
111 } | |
112 DBMorphWriterContentHandler morphContentHandler = new DBMorphWriterContentHandler(dbMorphHandler); | |
113 try { | |
114 XMLReader xmlParser = new SAXParser(); | |
115 xmlParser.setContentHandler(morphContentHandler); | |
116 InputStream inputStream = new FileInputStream(inputFile); | |
117 BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream); | |
118 InputSource input = new InputSource(bufferedInputStream); | |
119 xmlParser.parse(input); | |
120 bufferedInputStream.close(); | |
121 } catch (SAXException e) { | |
122 throw new ApplicationException(e); | |
123 } catch (IOException e) { | |
124 throw new ApplicationException(e); | |
125 } | |
126 } | |
127 | |
128 private long getSize() throws ApplicationException { | |
129 long size = dbMorphHandler.getSize(); | |
130 return size; | |
131 } | |
132 | |
133 private void addSampleData() throws ApplicationException { | |
134 Lemma l1 = new Lemma("perseus", "la", "abrogo"); | |
135 Form f1 = new Form("perseus", "la", "abrogare"); | |
136 Form f2 = new Form("perseus", "la", "abroges"); | |
137 dbMorphHandler.writeFormLemma(f1, l1); | |
138 dbMorphHandler.writeLemmaForm(l1, f1); | |
139 dbMorphHandler.writeLemmaForm(l1, f2); | |
140 } | |
141 | |
142 private void readSampleData() throws ApplicationException { | |
143 ArrayList<Form> forms = dbMorphHandler.readForms("la", "abrogo"); | |
144 System.out.println("Forms: " + forms); | |
145 } | |
146 | |
147 private void deleteSampleData() throws ApplicationException { | |
148 Lemma l1 = new Lemma("perseus", "la", "abrogo"); | |
149 Form f1 = new Form("perseus", "la", "abrogare"); | |
150 Form f2 = new Form("perseus", "la", "abroges"); | |
151 dbMorphHandler.deleteLemma(l1); | |
152 dbMorphHandler.deleteForm(f1); | |
153 dbMorphHandler.deleteForm(f2); | |
154 } | |
155 | |
156 private void end() throws ApplicationException { | |
157 dbMorphHandler.closeDatabases(); | |
158 } | |
159 | |
160 private void beginOperation() { | |
161 beginOfOperation = new Date(); | |
162 } | |
163 | |
164 private void endOperation() { | |
165 endOfOperation = new Date(); | |
166 } | |
167 | |
168 } |