comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/Converter.java @ 12:fba5577e49d9

diverse Fehlerbehebungen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 19 Apr 2011 16:51:26 +0200
parents 408254cf2f1d
children
comparison
equal deleted inserted replaced
11:d6f528ad5d96 12:fba5577e49d9
108 instance.lexiqueConvert("lexique", "fr", inputFileNameFrench, outputFileNameFrench); 108 instance.lexiqueConvert("lexique", "fr", inputFileNameFrench, outputFileNameFrench);
109 String inputFileNameDonatusFrenchSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-fr-forms.csv"; 109 String inputFileNameDonatusFrenchSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-fr-forms.csv";
110 String outputFileNameDonatusFrenchSup = OUT_DATA_DIR + "/" + "donatus-sup-fr-forms.xml"; 110 String outputFileNameDonatusFrenchSup = OUT_DATA_DIR + "/" + "donatus-sup-fr-forms.xml";
111 instance.donatusSupplementsConvert("donatus-sup", "fr", inputFileNameDonatusFrenchSup, outputFileNameDonatusFrenchSup); 111 instance.donatusSupplementsConvert("donatus-sup", "fr", inputFileNameDonatusFrenchSup, outputFileNameDonatusFrenchSup);
112 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); 112 instance.forms = new Hashtable<String, Hashtable<String, Form>>();
113 */
114 // Italian 113 // Italian
115 String inputFileNameItalian = ORIG_ITALIAN_DATA_DIR + "/" + "ital.hash"; 114 String inputFileNameItalian = ORIG_ITALIAN_DATA_DIR + "/" + "ital.hash";
116 String outputFileNameItalian = OUT_DATA_DIR + "/" + "donatus-italian-forms.xml"; 115 String outputFileNameItalian = OUT_DATA_DIR + "/" + "donatus-italian-forms.xml";
117 instance.donatusItalianConvert("donatus", "it", inputFileNameItalian, outputFileNameItalian); 116 instance.donatusItalianConvert("donatus", "it", inputFileNameItalian, outputFileNameItalian);
118 /*
119 String inputFileNameDonatusItalianSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-it-forms.csv"; 117 String inputFileNameDonatusItalianSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-it-forms.csv";
120 String outputFileNameDonatusItalianSup = OUT_DATA_DIR + "/" + "donatus-sup-it-forms.xml"; 118 String outputFileNameDonatusItalianSup = OUT_DATA_DIR + "/" + "donatus-sup-it-forms.xml";
121 instance.donatusSupplementsConvert("donatus-sup", "it", inputFileNameDonatusItalianSup, outputFileNameDonatusItalianSup); 119 instance.donatusSupplementsConvert("donatus-sup", "it", inputFileNameDonatusItalianSup, outputFileNameDonatusItalianSup);
122 */ 120 */
123 instance.forms = new Hashtable<String, Hashtable<String, Form>>(); 121 instance.forms = new Hashtable<String, Hashtable<String, Form>>();
459 String formName = form.getFormName(); 457 String formName = form.getFormName();
460 String lemmaName = form.getLemmaName(); 458 String lemmaName = form.getLemmaName();
461 Transcoder transcoder = Transcoder.getInstance(); 459 Transcoder transcoder = Transcoder.getInstance();
462 String encodedUnicodeForm = transcoder.transcodeFromBetaCode2Unicode(formName); 460 String encodedUnicodeForm = transcoder.transcodeFromBetaCode2Unicode(formName);
463 String encodedUnicodeLemma = transcoder.transcodeFromBetaCode2Unicode(lemmaName); 461 String encodedUnicodeLemma = transcoder.transcodeFromBetaCode2Unicode(lemmaName);
462 // replace "small letter sigma" at the end of a word by the "small letter end sigma"
463 if (encodedUnicodeForm != null && encodedUnicodeForm.endsWith("σ")) {
464 int length = encodedUnicodeForm.length();
465 encodedUnicodeForm = encodedUnicodeForm.substring(0, length - 1) + "ς";
466 }
467 if (encodedUnicodeLemma != null && encodedUnicodeLemma.endsWith("σ")) {
468 int length = encodedUnicodeLemma.length();
469 encodedUnicodeLemma = encodedUnicodeLemma.substring(0, length - 1) + "ς";
470 }
464 form.setFormName(encodedUnicodeForm); 471 form.setFormName(encodedUnicodeForm);
465 form.setLemmaName(encodedUnicodeLemma); 472 form.setLemmaName(encodedUnicodeLemma);
466 return form; 473 return form;
467 } 474 }
468 475