Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/Converter.java @ 12:fba5577e49d9
diverse Fehlerbehebungen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 19 Apr 2011 16:51:26 +0200 |
parents | 408254cf2f1d |
children |
comparison
equal
deleted
inserted
replaced
11:d6f528ad5d96 | 12:fba5577e49d9 |
---|---|
108 instance.lexiqueConvert("lexique", "fr", inputFileNameFrench, outputFileNameFrench); | 108 instance.lexiqueConvert("lexique", "fr", inputFileNameFrench, outputFileNameFrench); |
109 String inputFileNameDonatusFrenchSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-fr-forms.csv"; | 109 String inputFileNameDonatusFrenchSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-fr-forms.csv"; |
110 String outputFileNameDonatusFrenchSup = OUT_DATA_DIR + "/" + "donatus-sup-fr-forms.xml"; | 110 String outputFileNameDonatusFrenchSup = OUT_DATA_DIR + "/" + "donatus-sup-fr-forms.xml"; |
111 instance.donatusSupplementsConvert("donatus-sup", "fr", inputFileNameDonatusFrenchSup, outputFileNameDonatusFrenchSup); | 111 instance.donatusSupplementsConvert("donatus-sup", "fr", inputFileNameDonatusFrenchSup, outputFileNameDonatusFrenchSup); |
112 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); | 112 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); |
113 */ | |
114 // Italian | 113 // Italian |
115 String inputFileNameItalian = ORIG_ITALIAN_DATA_DIR + "/" + "ital.hash"; | 114 String inputFileNameItalian = ORIG_ITALIAN_DATA_DIR + "/" + "ital.hash"; |
116 String outputFileNameItalian = OUT_DATA_DIR + "/" + "donatus-italian-forms.xml"; | 115 String outputFileNameItalian = OUT_DATA_DIR + "/" + "donatus-italian-forms.xml"; |
117 instance.donatusItalianConvert("donatus", "it", inputFileNameItalian, outputFileNameItalian); | 116 instance.donatusItalianConvert("donatus", "it", inputFileNameItalian, outputFileNameItalian); |
118 /* | |
119 String inputFileNameDonatusItalianSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-it-forms.csv"; | 117 String inputFileNameDonatusItalianSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-it-forms.csv"; |
120 String outputFileNameDonatusItalianSup = OUT_DATA_DIR + "/" + "donatus-sup-it-forms.xml"; | 118 String outputFileNameDonatusItalianSup = OUT_DATA_DIR + "/" + "donatus-sup-it-forms.xml"; |
121 instance.donatusSupplementsConvert("donatus-sup", "it", inputFileNameDonatusItalianSup, outputFileNameDonatusItalianSup); | 119 instance.donatusSupplementsConvert("donatus-sup", "it", inputFileNameDonatusItalianSup, outputFileNameDonatusItalianSup); |
122 */ | 120 */ |
123 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); | 121 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); |
459 String formName = form.getFormName(); | 457 String formName = form.getFormName(); |
460 String lemmaName = form.getLemmaName(); | 458 String lemmaName = form.getLemmaName(); |
461 Transcoder transcoder = Transcoder.getInstance(); | 459 Transcoder transcoder = Transcoder.getInstance(); |
462 String encodedUnicodeForm = transcoder.transcodeFromBetaCode2Unicode(formName); | 460 String encodedUnicodeForm = transcoder.transcodeFromBetaCode2Unicode(formName); |
463 String encodedUnicodeLemma = transcoder.transcodeFromBetaCode2Unicode(lemmaName); | 461 String encodedUnicodeLemma = transcoder.transcodeFromBetaCode2Unicode(lemmaName); |
462 // replace "small letter sigma" at the end of a word by the "small letter end sigma" | |
463 if (encodedUnicodeForm != null && encodedUnicodeForm.endsWith("σ")) { | |
464 int length = encodedUnicodeForm.length(); | |
465 encodedUnicodeForm = encodedUnicodeForm.substring(0, length - 1) + "ς"; | |
466 } | |
467 if (encodedUnicodeLemma != null && encodedUnicodeLemma.endsWith("σ")) { | |
468 int length = encodedUnicodeLemma.length(); | |
469 encodedUnicodeLemma = encodedUnicodeLemma.substring(0, length - 1) + "ς"; | |
470 } | |
464 form.setFormName(encodedUnicodeForm); | 471 form.setFormName(encodedUnicodeForm); |
465 form.setLemmaName(encodedUnicodeLemma); | 472 form.setLemmaName(encodedUnicodeLemma); |
466 return form; | 473 return form; |
467 } | 474 } |
468 | 475 |