Mercurial > hg > mpdl-group
comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/text/tokenize/Token.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | 4a3641ae14d2 |
children |
comparison
equal
deleted
inserted
replaced
22:6a45a982c333 | 23:e845310098ba |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize; | 1 package de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize; |
2 | 2 |
3 public class Token { | 3 public class Token { |
4 private String content; | 4 private String docId; |
5 private int start; | 5 private String language; |
6 private int end; | 6 private int pageNumber; |
7 private int lineNumber; | |
8 private String elementName; // e.g. "TEI:s" | |
9 private int elementPosition; | |
10 private int elementPagePosition; | |
11 private String xmlId; | |
12 private String xpath; | |
13 private String content; // original text content | |
14 private String contentOrig; // word form | |
15 private String contentReg; // regularized text content | |
16 private String contentNorm; // normalized word form | |
17 private String contentMorph; // lemmas separated by blank | |
18 private int start; // start position | |
19 private int end; // end position | |
7 | 20 |
8 public Token(int start, int end, String content) { | 21 public Token(int start, int end, String content) { |
9 this.start = start; | 22 this.start = start; |
10 this.end = end; | 23 this.end = end; |
11 this.content = content; | 24 this.content = content; |
12 } | 25 this.contentOrig = toWordForm(); |
13 | 26 } |
14 public String getContent() { | 27 |
15 return content; | 28 public Token(String docId, String language, int pageNumber, int lineNumber, int elementPosition, String elementName, String xmlId, String xpath, String contentOrig, String contentReg, String contentNorm, String contentMorph) { |
29 this.docId = docId; | |
30 this.language = language; | |
31 this.pageNumber = pageNumber; | |
32 this.lineNumber = lineNumber; | |
33 this.elementPosition = elementPosition; | |
34 this.elementName = elementName; | |
35 this.xmlId = xmlId; | |
36 this.xpath = xpath; | |
37 this.contentOrig = contentOrig; | |
38 this.contentReg = contentReg; | |
39 this.contentNorm = contentNorm; | |
40 this.contentMorph = contentMorph; | |
16 } | 41 } |
17 | 42 |
18 public int getStart() { | 43 public int getStart() { |
19 return start; | 44 return start; |
20 } | 45 } |
21 | 46 |
22 public int getEnd() { | 47 public int getEnd() { |
23 return end; | 48 return end; |
49 } | |
50 | |
51 public String toWordForm() { | |
52 if (content != null) | |
53 return content.toLowerCase(); | |
54 else | |
55 return null; | |
24 } | 56 } |
25 | 57 |
26 public String toString() { | 58 public String toString() { |
27 String retStr = ""; | 59 String retStr = ""; |
28 if (content != null) | 60 if (contentOrig != null) |
29 retStr += content; | 61 retStr += contentOrig; |
30 retStr = retStr + "(" + start + "," + end + ")"; | 62 retStr = retStr + "(" + start + "," + end + ")"; |
31 return retStr; | 63 return retStr; |
32 } | 64 } |
33 | 65 |
66 public String toXmlString() { | |
67 StringBuilder retStr = new StringBuilder(); | |
68 retStr.append("<token>"); | |
69 if (docId != null) | |
70 retStr.append("<docId>" + docId + "</docId>"); | |
71 if (language != null) | |
72 retStr.append("<language>" + language + "</language>"); | |
73 retStr.append("<pageNumber>" + pageNumber + "</pageNumber>"); | |
74 retStr.append("<elementPosition>" + elementPosition + "</elementPosition>"); | |
75 retStr.append("<elementPagePosition>" + elementPagePosition + "</elementPagePosition>"); | |
76 if (elementName != null) | |
77 retStr.append("<elementName>" + elementName + "</elementName>"); | |
78 if (contentOrig != null) | |
79 retStr.append("<contentOrig>" + contentOrig + "</contentOrig>"); | |
80 retStr.append("</token>"); | |
81 return retStr.toString(); | |
82 } | |
83 | |
84 public String getContent() { | |
85 return content; | |
86 } | |
87 | |
88 public void setContent(String content) { | |
89 this.content = content; | |
90 this.contentOrig = toWordForm(); | |
91 } | |
92 | |
93 public String getContentOrig() { | |
94 return contentOrig; | |
95 } | |
96 | |
97 public void setContentOrig(String contentOrig) { | |
98 this.contentOrig = contentOrig; | |
99 } | |
100 | |
101 public String getContentReg() { | |
102 return contentReg; | |
103 } | |
104 | |
105 public void setContentReg(String contentReg) { | |
106 this.contentReg = contentReg; | |
107 } | |
108 | |
109 public String getContentNorm() { | |
110 return contentNorm; | |
111 } | |
112 | |
113 public void setContentNorm(String contentNorm) { | |
114 this.contentNorm = contentNorm; | |
115 } | |
116 | |
117 public String getContentMorph() { | |
118 return contentMorph; | |
119 } | |
120 | |
121 public void setContentMorph(String contentMorph) { | |
122 this.contentMorph = contentMorph; | |
123 } | |
124 | |
125 public String getDocId() { | |
126 return docId; | |
127 } | |
128 | |
129 public void setDocId(String docId) { | |
130 this.docId = docId; | |
131 } | |
132 | |
133 public String getLanguage() { | |
134 return language; | |
135 } | |
136 | |
137 public void setLanguage(String language) { | |
138 this.language = language; | |
139 } | |
140 | |
141 public int getPageNumber() { | |
142 return pageNumber; | |
143 } | |
144 | |
145 public void setPageNumber(int pageNumber) { | |
146 this.pageNumber = pageNumber; | |
147 } | |
148 | |
149 public int getLineNumber() { | |
150 return lineNumber; | |
151 } | |
152 | |
153 public void setLineNumber(int lineNumber) { | |
154 this.lineNumber = lineNumber; | |
155 } | |
156 | |
157 public int getPosition() { | |
158 return elementPosition; | |
159 } | |
160 | |
161 public void setElementPosition(int elementPosition) { | |
162 this.elementPosition = elementPosition; | |
163 } | |
164 | |
165 public int getPagePosition() { | |
166 return elementPagePosition; | |
167 } | |
168 | |
169 public void setElementPagePosition(int elementPagePosition) { | |
170 this.elementPagePosition = elementPagePosition; | |
171 } | |
172 | |
173 public String getElementName() { | |
174 return elementName; | |
175 } | |
176 | |
177 public void setElementName(String elementName) { | |
178 this.elementName = elementName; | |
179 } | |
180 | |
181 public String getXmlId() { | |
182 return xmlId; | |
183 } | |
184 | |
185 public void setXmlId(String xmlId) { | |
186 this.xmlId = xmlId; | |
187 } | |
188 | |
189 public String getXpath() { | |
190 return xpath; | |
191 } | |
192 | |
193 public void setXpath(String xpath) { | |
194 this.xpath = xpath; | |
195 } | |
196 | |
197 public void setStart(int start) { | |
198 this.start = start; | |
199 } | |
200 | |
201 public void setEnd(int end) { | |
202 this.end = end; | |
203 } | |
34 } | 204 } |