Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/Converter.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children | fba5577e49d9 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:408254cf2f1d |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.lt.morph.converter; | |
2 | |
3 import java.io.BufferedInputStream; | |
4 import java.io.BufferedOutputStream; | |
5 import java.io.BufferedReader; | |
6 import java.io.File; | |
7 import java.io.FileInputStream; | |
8 import java.io.FileNotFoundException; | |
9 import java.io.FileOutputStream; | |
10 import java.io.FileReader; | |
11 import java.io.IOException; | |
12 import java.io.InputStream; | |
13 import java.util.Date; | |
14 import java.util.Hashtable; | |
15 | |
16 import org.xml.sax.InputSource; | |
17 import org.xml.sax.SAXException; | |
18 import org.xml.sax.XMLReader; | |
19 | |
20 import com.sun.org.apache.xerces.internal.parsers.SAXParser; | |
21 | |
22 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
23 import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; | |
24 import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder; | |
25 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; | |
26 import de.mpg.mpiwg.berlin.mpdl.util.Util; | |
27 | |
28 public class Converter { | |
29 private static Converter instance; | |
30 private static String MPDL_DATA_DIR = MpdlConstants.MPDL_DATA_DIR; | |
31 private static String ORIG_PERSEUS_DATA_DIR = MPDL_DATA_DIR + "/dataFilesOrig/perseus"; | |
32 private static String ORIG_CELEX_DATA_DIR = MPDL_DATA_DIR + "/dataFilesOrig/celex"; | |
33 private static String ORIG_FRENCH_DATA_DIR = MPDL_DATA_DIR + "/dataFilesOrig/french"; | |
34 private static String ORIG_ITALIAN_DATA_DIR = MPDL_DATA_DIR + "/dataFilesOrig/italian"; | |
35 private static String ORIG_DONATUS_SUB_DATA_DIR = MPDL_DATA_DIR + "/dataFilesOrig/donatus-sup"; | |
36 private static String OUT_DATA_DIR = MPDL_DATA_DIR + "/dataFiles"; | |
37 private PerseusContentHandler perseusContentHandler; | |
38 private Hashtable<String, Hashtable<String, Form>> forms = new Hashtable<String, Hashtable<String, Form>>(); | |
39 private Date beginOfOperation; | |
40 private Date endOfOperation; | |
41 | |
42 public static Converter getInstance() throws ApplicationException { | |
43 if (instance == null) { | |
44 instance = new Converter(); | |
45 } | |
46 return instance; | |
47 } | |
48 | |
49 /** | |
50 * | |
51 */ | |
52 public static void main(String[] args) throws ApplicationException { | |
53 getInstance(); | |
54 instance.beginOperation(); | |
55 System.out.print("Start ..."); | |
56 /* | |
57 // Latin | |
58 String inputFileNameLatin = ORIG_PERSEUS_DATA_DIR + "/" + "latin.morph.xml"; | |
59 String outputFileNameLatin = OUT_DATA_DIR + "/" + "perseus-latin-forms.xml"; | |
60 instance.perseusConvert("perseus", "la", inputFileNameLatin, outputFileNameLatin); | |
61 String inputFileNameDonatusLatinSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-la-forms.csv"; | |
62 String outputFileNameDonatusLatinSup = OUT_DATA_DIR + "/" + "donatus-sup-la-forms.xml"; | |
63 instance.donatusSupplementsConvert("donatus-sup", "la", inputFileNameDonatusLatinSup, outputFileNameDonatusLatinSup); | |
64 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); | |
65 // Greek | |
66 String inputFileNameGreek = ORIG_PERSEUS_DATA_DIR + "/" + "greek.morph.xml"; | |
67 String outputFileNameGreek = OUT_DATA_DIR + "/" + "perseus-greek-forms.xml"; | |
68 instance.perseusConvert("perseus", "el", inputFileNameGreek, outputFileNameGreek); | |
69 String inputFileNameDonatusGreekSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-el-forms.csv"; | |
70 String outputFileNameDonatusGreekSup = OUT_DATA_DIR + "/" + "donatus-sup-el-forms.xml"; | |
71 instance.donatusSupplementsConvert("donatus-sup", "el", inputFileNameDonatusGreekSup, outputFileNameDonatusGreekSup); | |
72 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); | |
73 // Arabic | |
74 String inputFileNameArabic = ORIG_PERSEUS_DATA_DIR + "/" + "arabic.morph.xml"; | |
75 String outputFileNameArabic = OUT_DATA_DIR + "/" + "perseus-arabic-forms.xml"; | |
76 instance.perseusConvert("perseus", "ar", inputFileNameArabic, outputFileNameArabic); | |
77 String inputFileNameDonatusArabicSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-ar-forms.csv"; | |
78 String outputFileNameDonatusArabicSup = OUT_DATA_DIR + "/" + "donatus-sup-ar-forms.xml"; | |
79 instance.donatusSupplementsConvert("donatus-sup", "ar", inputFileNameDonatusArabicSup, outputFileNameDonatusArabicSup); | |
80 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); | |
81 // Dutch | |
82 String inputFileNameDutchWords = ORIG_CELEX_DATA_DIR + "/" + "dmw.cd"; | |
83 String inputFileNameDutchLemmas = ORIG_CELEX_DATA_DIR + "/" + "dml.cd"; | |
84 String outputFileNameDutch = OUT_DATA_DIR + "/" + "celex-dutch-forms.xml"; | |
85 instance.celexConvert("celex", "nl", inputFileNameDutchWords, inputFileNameDutchLemmas, outputFileNameDutch); | |
86 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); | |
87 // German | |
88 String inputFileNameGermanWords = ORIG_CELEX_DATA_DIR + "/" + "gmw.cd"; | |
89 String inputFileNameGermanLemmas = ORIG_CELEX_DATA_DIR + "/" + "gml.cd"; | |
90 String outputFileNameGerman = OUT_DATA_DIR + "/" + "celex-german-forms.xml"; | |
91 instance.celexConvert("celex", "de", inputFileNameGermanWords, inputFileNameGermanLemmas, outputFileNameGerman); | |
92 String inputFileNameDonatusGermanSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-de-forms.csv"; | |
93 String outputFileNameDonatusGermanSup = OUT_DATA_DIR + "/" + "donatus-sup-de-forms.xml"; | |
94 instance.donatusSupplementsConvert("donatus-sup", "de", inputFileNameDonatusGermanSup, outputFileNameDonatusGermanSup); | |
95 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); | |
96 // English | |
97 String inputFileNameEnglishWords = ORIG_CELEX_DATA_DIR + "/" + "emw.cd"; | |
98 String inputFileNameEnglishLemmas = ORIG_CELEX_DATA_DIR + "/" + "eml.cd"; | |
99 String outputFileNameEnglish = OUT_DATA_DIR + "/" + "celex-english-forms.xml"; | |
100 instance.celexConvert("celex", "en", inputFileNameEnglishWords, inputFileNameEnglishLemmas, outputFileNameEnglish); | |
101 String inputFileNameDonatusEnglishSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-en-forms.csv"; | |
102 String outputFileNameDonatusEnglishSup = OUT_DATA_DIR + "/" + "donatus-sup-en-forms.xml"; | |
103 instance.donatusSupplementsConvert("donatus-sup", "en", inputFileNameDonatusEnglishSup, outputFileNameDonatusEnglishSup); | |
104 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); | |
105 // French | |
106 String inputFileNameFrench = ORIG_FRENCH_DATA_DIR + "/" + "lexique"; | |
107 String outputFileNameFrench = OUT_DATA_DIR + "/" + "lexique-french-forms.xml"; | |
108 instance.lexiqueConvert("lexique", "fr", inputFileNameFrench, outputFileNameFrench); | |
109 String inputFileNameDonatusFrenchSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-fr-forms.csv"; | |
110 String outputFileNameDonatusFrenchSup = OUT_DATA_DIR + "/" + "donatus-sup-fr-forms.xml"; | |
111 instance.donatusSupplementsConvert("donatus-sup", "fr", inputFileNameDonatusFrenchSup, outputFileNameDonatusFrenchSup); | |
112 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); | |
113 */ | |
114 // Italian | |
115 String inputFileNameItalian = ORIG_ITALIAN_DATA_DIR + "/" + "ital.hash"; | |
116 String outputFileNameItalian = OUT_DATA_DIR + "/" + "donatus-italian-forms.xml"; | |
117 instance.donatusItalianConvert("donatus", "it", inputFileNameItalian, outputFileNameItalian); | |
118 /* | |
119 String inputFileNameDonatusItalianSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-it-forms.csv"; | |
120 String outputFileNameDonatusItalianSup = OUT_DATA_DIR + "/" + "donatus-sup-it-forms.xml"; | |
121 instance.donatusSupplementsConvert("donatus-sup", "it", inputFileNameDonatusItalianSup, outputFileNameDonatusItalianSup); | |
122 */ | |
123 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); | |
124 | |
125 instance.end(); | |
126 instance.endOperation(); | |
127 Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation); | |
128 System.out.println("End."); | |
129 System.out.println("Needed time: " + elapsedTime + " seconds"); | |
130 } | |
131 | |
132 private void perseusConvert(String provider, String language, String inputFileName, String outputFileName) throws ApplicationException { | |
133 File inputFile = new File(inputFileName); | |
134 perseusContentHandler = new PerseusContentHandler(provider, language, outputFileName); | |
135 try { | |
136 XMLReader xmlParser = new SAXParser(); | |
137 xmlParser.setContentHandler(perseusContentHandler); | |
138 InputStream inputStream = new FileInputStream(inputFile); | |
139 BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream); | |
140 InputSource input = new InputSource(bufferedInputStream); | |
141 xmlParser.parse(input); | |
142 bufferedInputStream.close(); | |
143 forms = perseusContentHandler.getForms(); | |
144 } catch (SAXException e) { | |
145 throw new ApplicationException(e); | |
146 } catch (IOException e) { | |
147 throw new ApplicationException(e); | |
148 } | |
149 } | |
150 | |
151 private void celexConvert(String provider, String language, String inputFileNameWords, String inputFileNameLemmas, String outputFileName) throws ApplicationException { | |
152 File inputFileLemmas = new File(inputFileNameLemmas); | |
153 Hashtable<Integer, String> lemmas = loadLemmas(inputFileLemmas); | |
154 File inputFileWords = new File(inputFileNameWords); | |
155 File outputFile = new File(outputFileName); | |
156 writeCelexForms(provider, language, lemmas, inputFileWords, outputFile); | |
157 } | |
158 | |
159 private void lexiqueConvert(String provider, String language, String inputFileName, String outputFileName) throws ApplicationException { | |
160 File inputFile = new File(inputFileName); | |
161 File outputFile = new File(outputFileName); | |
162 writeLexiqueForms(provider, language, inputFile, outputFile); | |
163 } | |
164 | |
165 private void donatusItalianConvert(String provider, String language, String inputFileName, String outputFileName) throws ApplicationException { | |
166 File inputFile = new File(inputFileName); | |
167 File outputFile = new File(outputFileName); | |
168 writeDonatusItalianForms(provider, language, inputFile, outputFile); | |
169 } | |
170 | |
171 private void donatusSupplementsConvert(String provider, String language, String inputFileName, String outputFileName) throws ApplicationException { | |
172 File inputFile = new File(inputFileName); | |
173 File outputFile = new File(outputFileName); | |
174 writeDonatusSupplementsForms(provider, language, inputFile, outputFile); | |
175 } | |
176 | |
177 private Hashtable<Integer, String> loadLemmas(File inputFile) { | |
178 Hashtable<Integer, String> retLemmas = new Hashtable<Integer, String>(); | |
179 BufferedReader in = null; | |
180 try { | |
181 in = new BufferedReader(new FileReader(inputFile)); | |
182 String line = null; | |
183 while((line = in.readLine()) != null) { | |
184 int from = line.indexOf("\\"); | |
185 int to = line.indexOf("\\", from + 1); | |
186 String idStr = line.substring(0, from); | |
187 Integer idInt = new Integer(idStr); | |
188 String lemma = line.substring(from + 1, to); | |
189 retLemmas.put(idInt, lemma); | |
190 } | |
191 } catch (FileNotFoundException e) { | |
192 e.printStackTrace(); | |
193 } catch (IOException e) { | |
194 e.printStackTrace(); | |
195 } finally { | |
196 // always close the stream | |
197 if (in != null) try { in.close(); } catch (Exception e) { } | |
198 } | |
199 return retLemmas; | |
200 } | |
201 | |
202 private void writeCelexForms(String provider, String language, Hashtable<Integer, String> lemmas, File inputFileWords, File outputFile) throws ApplicationException { | |
203 BufferedReader in = null; | |
204 BufferedOutputStream out = null; | |
205 forms = new Hashtable<String, Hashtable<String, Form>>(); | |
206 try { | |
207 in = new BufferedReader(new FileReader(inputFileWords)); | |
208 out = new BufferedOutputStream(new FileOutputStream(outputFile)); | |
209 write("<forms>\n", out); | |
210 String line = null; | |
211 while((line = in.readLine()) != null) { | |
212 int delim1 = line.indexOf("\\"); | |
213 int delim2 = line.indexOf("\\", delim1 + 1); | |
214 int delim3 = line.indexOf("\\", delim2 + 1); | |
215 int delim4 = line.indexOf("\\", delim3 + 1); | |
216 String formName = line.substring(delim1 + 1, delim2); | |
217 String lemmaIdStr = line.substring(delim3 + 1, delim4); | |
218 Integer lemmaIdInt = null; | |
219 try { | |
220 lemmaIdInt = new Integer(lemmaIdStr); | |
221 } catch (NumberFormatException e) { | |
222 System.out.println("Warning: Lemma id: " + lemmaIdStr + " is not correct"); | |
223 } | |
224 if (lemmaIdInt != null) { | |
225 String lemmaName = lemmas.get(lemmaIdInt); | |
226 Form form = new Form(); | |
227 form.setProvider(provider); | |
228 form.setLanguage(language); | |
229 form.setFormName(formName); | |
230 form.setLemmaName(lemmaName); | |
231 form.normalize(); | |
232 if (form.isOk()) { | |
233 Hashtable<String, Form> formLemmas = forms.get(formName); | |
234 if (formLemmas == null) { | |
235 formLemmas = new Hashtable<String, Form>(); | |
236 formLemmas.put(lemmaName, form); | |
237 forms.put(formName, formLemmas); | |
238 write(form, out); | |
239 } else { | |
240 Form formLemma = formLemmas.get(lemmaName); | |
241 if (formLemma == null) { | |
242 formLemmas.put(lemmaName, form); | |
243 write(form, out); | |
244 } | |
245 } | |
246 } | |
247 } | |
248 } | |
249 write("</forms>\n", out); | |
250 } catch (FileNotFoundException e) { | |
251 throw new ApplicationException(e); | |
252 } catch (IOException e) { | |
253 throw new ApplicationException(e); | |
254 } finally { | |
255 // always close the stream | |
256 if (in != null) try { in.close(); } catch (Exception e) { } | |
257 if (out != null) try { out.close(); } catch (Exception e) { } | |
258 } | |
259 } | |
260 | |
261 private void writeLexiqueForms(String provider, String language, File inputFile, File outputFile) throws ApplicationException { | |
262 BufferedReader in = null; | |
263 BufferedOutputStream out = null; | |
264 forms = new Hashtable<String, Hashtable<String, Form>>(); | |
265 try { | |
266 in = new BufferedReader(new FileReader(inputFile)); | |
267 out = new BufferedOutputStream(new FileOutputStream(outputFile)); | |
268 write("<forms>\n", out); | |
269 String line = null; | |
270 while((line = in.readLine()) != null) { | |
271 int delim1 = line.indexOf("\t"); | |
272 int delim2 = line.indexOf("\t", delim1 + 1); | |
273 String formName = line.substring(0, delim1).trim(); | |
274 String lemmaName = line.substring(delim1 + 1, delim2).trim(); | |
275 if (lemmaName.equals("=")) | |
276 lemmaName = formName; | |
277 Form form = new Form(); | |
278 form.setProvider(provider); | |
279 form.setLanguage(language); | |
280 form.setFormName(formName); | |
281 form.setLemmaName(lemmaName); | |
282 form.normalize(); | |
283 if (form.isOk()) { | |
284 Hashtable<String, Form> formLemmas = forms.get(formName); | |
285 if (formLemmas == null) { | |
286 formLemmas = new Hashtable<String, Form>(); | |
287 formLemmas.put(lemmaName, form); | |
288 forms.put(formName, formLemmas); | |
289 write(form, out); | |
290 } else { | |
291 Form formLemma = formLemmas.get(lemmaName); | |
292 if (formLemma == null) { | |
293 formLemmas.put(lemmaName, form); | |
294 write(form, out); | |
295 } | |
296 } | |
297 } | |
298 } | |
299 write("</forms>\n", out); | |
300 } catch (FileNotFoundException e) { | |
301 throw new ApplicationException(e); | |
302 } catch (IOException e) { | |
303 throw new ApplicationException(e); | |
304 } finally { | |
305 // always close the stream | |
306 if (in != null) try { in.close(); } catch (Exception e) { } | |
307 if (out != null) try { out.close(); } catch (Exception e) { } | |
308 } | |
309 } | |
310 | |
311 private void writeDonatusItalianForms(String provider, String language, File inputFile, File outputFile) throws ApplicationException { | |
312 BufferedReader in = null; | |
313 BufferedOutputStream out = null; | |
314 forms = new Hashtable<String, Hashtable<String, Form>>(); | |
315 try { | |
316 in = new BufferedReader(new FileReader(inputFile)); | |
317 out = new BufferedOutputStream(new FileOutputStream(outputFile)); | |
318 write("<forms>\n", out); | |
319 String line = null; | |
320 while((line = in.readLine()) != null) { | |
321 // one line is of the form: 'risoluino' => '<NL>V risolvino,risolvere pres imperat 3rd pl ...</NL><NL>...</NL>', | |
322 // or of the form: 'legamenti' => '<NL>N legamento masc pl ...</NL><NL>...</NL>', | |
323 // this method only recognize the first lemma TODO recognize all lemmas for the form | |
324 int delim1 = line.indexOf("'"); | |
325 int delim2 = line.indexOf("'", delim1 + 1); | |
326 int delim3 = line.indexOf("'", delim2 + 1); | |
327 int delim4 = delim3 + 6; // beginning of the lemma | |
328 int delim5 = line.indexOf(" ", delim4 + 1); // end of the first lemma(s) is separated by a blank | |
329 String formName = line.substring(delim1 + 1, delim2); | |
330 formName = formName.replace("\\", ""); | |
331 String lemmaName = line.substring(delim4 + 1, delim5); | |
332 int commaInLemma = lemmaName.indexOf(","); // when there are more than one lemma | |
333 if (commaInLemma != -1) | |
334 lemmaName = lemmaName.substring(0, commaInLemma); | |
335 lemmaName = lemmaName.replace("\\", ""); | |
336 Form form = new Form(); | |
337 form.setProvider(provider); | |
338 form.setLanguage(language); | |
339 form.setFormName(formName); | |
340 form.setLemmaName(lemmaName); | |
341 form.normalize(); | |
342 boolean lineContainsAp = line.contains("\''"); // some of the form lines contain irregular strings of the form: 'par\'' => '<NL>N pari/^,pari indeclform adverb</NL> | |
343 if (form.isOk() && ! lineContainsAp) { | |
344 Hashtable<String, Form> formLemmas = forms.get(formName); | |
345 if (formLemmas == null) { | |
346 formLemmas = new Hashtable<String, Form>(); | |
347 formLemmas.put(lemmaName, form); | |
348 forms.put(formName, formLemmas); | |
349 write(form, out); | |
350 } else { | |
351 Form formLemma = formLemmas.get(lemmaName); | |
352 if (formLemma == null) { | |
353 formLemmas.put(lemmaName, form); | |
354 write(form, out); | |
355 } | |
356 } | |
357 } | |
358 } | |
359 write("</forms>\n", out); | |
360 } catch (FileNotFoundException e) { | |
361 throw new ApplicationException(e); | |
362 } catch (IOException e) { | |
363 throw new ApplicationException(e); | |
364 } finally { | |
365 // always close the stream | |
366 if (in != null) try { in.close(); } catch (Exception e) { } | |
367 if (out != null) try { out.close(); } catch (Exception e) { } | |
368 } | |
369 } | |
370 | |
371 private void writeDonatusSupplementsForms(String provider, String language, File inputFile, File outputFile) throws ApplicationException { | |
372 BufferedReader in = null; | |
373 BufferedOutputStream out = null; | |
374 try { | |
375 in = new BufferedReader(new FileReader(inputFile)); | |
376 out = new BufferedOutputStream(new FileOutputStream(outputFile)); | |
377 write("<forms>\n", out); | |
378 String line = null; | |
379 String lemmaName = ""; | |
380 String formName = ""; | |
381 // each line is a form | |
382 while((line = in.readLine()) != null) { | |
383 if (line.length() == 0) | |
384 break; | |
385 String firstChar = line.substring(0, 1); | |
386 String mode = "lemmaAndForm"; | |
387 if (firstChar.equals(",")) | |
388 mode = "form"; | |
389 if (mode.equals("lemmaAndForm")) { | |
390 int quote2 = line.indexOf("\"", 1); | |
391 lemmaName = line.substring(1, quote2); | |
392 int quote3 = line.indexOf("\"", quote2 + 1); | |
393 int quote4 = line.indexOf("\"", quote3 + 1); | |
394 formName = line.substring(quote3 + 1, quote4); | |
395 } else if (mode.equals("form")) { | |
396 int quote2 = line.indexOf("\"", 3); | |
397 formName = line.substring(2, quote2); | |
398 } | |
399 Form form = new Form(); | |
400 form.setProvider(provider); | |
401 form.setLanguage(language); | |
402 form.setFormName(formName); | |
403 form.setLemmaName(lemmaName); | |
404 if (form.isGreek()) | |
405 transcodeFromBetaCode2Unicode(form); | |
406 else if (form.isArabic()) | |
407 form = transcodeFromBuckwalter2Unicode(form); | |
408 form.normalize(); | |
409 if (form.isOk()) { | |
410 Hashtable<String, Form> formLemmas = forms.get(formName); | |
411 if (formLemmas == null) { | |
412 formLemmas = new Hashtable<String, Form>(); | |
413 formLemmas.put(lemmaName, form); | |
414 forms.put(formName, formLemmas); | |
415 write(form, out); | |
416 } else { | |
417 Form formLemma = formLemmas.get(lemmaName); | |
418 if (formLemma == null) { | |
419 formLemmas.put(lemmaName, form); | |
420 write(form, out); | |
421 } | |
422 } | |
423 } | |
424 } | |
425 write("</forms>\n", out); | |
426 } catch (FileNotFoundException e) { | |
427 throw new ApplicationException(e); | |
428 } catch (IOException e) { | |
429 throw new ApplicationException(e); | |
430 } finally { | |
431 // always close the stream | |
432 if (in != null) try { in.close(); } catch (Exception e) { } | |
433 if (out != null) try { out.close(); } catch (Exception e) { } | |
434 } | |
435 } | |
436 | |
437 private void write(Form form, BufferedOutputStream out) throws ApplicationException { | |
438 try { | |
439 String xmlFormStr = form.getXmlString(); | |
440 byte[] bytes = xmlFormStr.getBytes("utf-8"); | |
441 out.write(bytes, 0, bytes.length); | |
442 out.flush(); | |
443 } catch (IOException e) { | |
444 throw new ApplicationException(e); | |
445 } | |
446 } | |
447 | |
448 private void write(String inputString, BufferedOutputStream out) throws ApplicationException { | |
449 try { | |
450 byte[] bytes = inputString.getBytes("utf-8"); | |
451 out.write(bytes, 0, bytes.length); | |
452 out.flush(); | |
453 } catch (IOException e) { | |
454 throw new ApplicationException(e); | |
455 } | |
456 } | |
457 | |
458 private Form transcodeFromBetaCode2Unicode(Form form) throws ApplicationException { | |
459 String formName = form.getFormName(); | |
460 String lemmaName = form.getLemmaName(); | |
461 Transcoder transcoder = Transcoder.getInstance(); | |
462 String encodedUnicodeForm = transcoder.transcodeFromBetaCode2Unicode(formName); | |
463 String encodedUnicodeLemma = transcoder.transcodeFromBetaCode2Unicode(lemmaName); | |
464 form.setFormName(encodedUnicodeForm); | |
465 form.setLemmaName(encodedUnicodeLemma); | |
466 return form; | |
467 } | |
468 | |
469 private Form transcodeFromBuckwalter2Unicode(Form form) throws ApplicationException { | |
470 String formName = form.getFormName(); | |
471 String lemmaName = form.getLemmaName(); | |
472 Transcoder transcoder = Transcoder.getInstance(); | |
473 String encodedUnicodeForm = transcoder.transcodeFromBuckwalter2Unicode(formName); | |
474 String encodedUnicodeLemma = transcoder.transcodeFromBuckwalter2Unicode(lemmaName); | |
475 form.setFormName(encodedUnicodeForm); | |
476 form.setLemmaName(encodedUnicodeLemma); | |
477 return form; | |
478 } | |
479 | |
480 private void end() throws ApplicationException { | |
481 } | |
482 | |
483 private void beginOperation() { | |
484 beginOfOperation = new Date(); | |
485 } | |
486 | |
487 private void endOperation() { | |
488 endOfOperation = new Date(); | |
489 } | |
490 | |
491 } |