19
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1 package de.mpg.mpiwg.berlin.mpdl.lt.morph.db;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
2
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
3 import java.util.Hashtable;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
4
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
5 import org.xml.sax.*;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
6
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
7 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
8 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
9 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
10
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
11 public class DBMorphWriterContentHandler implements ContentHandler {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
12 private DBMorphHandler dbMorphHandler;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
13 private Element currentElement;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
14 private Form form;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
15 private Lemma lemma;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
16 private Hashtable<String, Form> forms;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
17
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
18 public DBMorphWriterContentHandler(DBMorphHandler dbMorphHandler) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
19 this.dbMorphHandler = dbMorphHandler;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
20 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
21
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
22 public void startDocument() throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
23 forms = new Hashtable<String, Form>();
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
24 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
25
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
26 public void endDocument() throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
27 forms = null;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
28 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
29
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
30 // TODO setPos etc. ersetzen durch addPos etc.
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
31 public void characters(char[] c, int start, int length) throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
32 if (currentElement != null) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
33 String elemName = currentElement.name;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
34 if (form != null) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
35 char[] cCopy = new char[length];
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
36 System.arraycopy(c, start, cCopy, 0, length);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
37 String charactersStr = String.valueOf(cCopy);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
38 if (charactersStr != null && ! (charactersStr.trim().equals(""))) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
39 if (elemName.equals("provider")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
40 form.addProvider(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
41 lemma.addProvider(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
42 } else if (elemName.equals("language")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
43 form.addLanguage(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
44 lemma.addLanguage(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
45 } else if (elemName.equals("form-name")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
46 form.addFormName(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
47 } else if (elemName.equals("lemma-name")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
48 form.addLemmaName(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
49 lemma.addLemmaName(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
50 } else if (elemName.equals("pos")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
51 form.addPos(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
52 } else if (elemName.equals("tense")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
53 form.addTense(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
54 } else if (elemName.equals("voice")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
55 form.addVoice(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
56 } else if (elemName.equals("casus")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
57 form.addCasus(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
58 } else if (elemName.equals("number")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
59 form.addNumber(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
60 } else if (elemName.equals("mood")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
61 form.addMood(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
62 } else if (elemName.equals("person")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
63 form.addPerson(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
64 } else if (elemName.equals("gender")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
65 form.addGender(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
66 } else if (elemName.equals("definite")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
67 form.addDefinite(charactersStr);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
68 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
69 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
70 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
71 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
72 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
73
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
74 public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
75 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
76
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
77 public void processingInstruction(String target, String data) throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
78 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
79
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
80 public void setDocumentLocator(org.xml.sax.Locator arg1) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
81 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
82
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
83 public void endPrefixMapping(String prefix) throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
84 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
85
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
86 public void skippedEntity(String name) throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
87 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
88
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
89 public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
90 currentElement = new Element(name, "");
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
91 if (localName.equals("form")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
92 form = new Form();
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
93 lemma = new Lemma();
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
94 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
95 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
96
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
97 public void endElement(String uri, String localName, String name) throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
98 currentElement = null;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
99 if (localName.equals("form")) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
100 String keyStr = form.getFormName();
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
101 forms.put(keyStr, form);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
102 write(form, lemma);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
103 form = null;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
104 lemma = null;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
105 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
106 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
107
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
108 public void startPrefixMapping(String prefix, String uri) throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
109 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
110
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
111 private void write(Form form, Lemma lemma) throws SAXException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
112 try {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
113 dbMorphHandler.writeFormLemma(form, lemma);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
114 dbMorphHandler.writeLemmaForm(lemma, form);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
115 } catch (ApplicationException e) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
116 throw new SAXException(e);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
117 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
118 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
119
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
120 private class Element {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
121 String name;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
122 String value;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
123
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
124 Element(String name) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
125 this.name = name;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
126 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
127
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
128 Element(String name, String value) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
129 this.name = name;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
130 this.value = value;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
131 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
132 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
133 }
|