Mercurial > hg > mpdl-group
comparison software/mpdl-services/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/transform/HighlightContentHandler.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
22:6a45a982c333 | 23:e845310098ba |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.cms.transform; | |
2 | |
3 import java.util.ArrayList; | |
4 | |
5 import org.xml.sax.*; | |
6 | |
7 import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; | |
8 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
9 import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; | |
10 | |
11 public class HighlightContentHandler implements ContentHandler { | |
12 private String xmlnsString = ""; | |
13 private String highlightElemName; | |
14 private int highlightElemPos = 1; | |
15 private int currentHighlightElemPos = 0; | |
16 private boolean highlightElemMode = false; | |
17 private int highlightElemModeOpenTags = 0; | |
18 private String highlightQueryType = "orig"; // orig, reg, norm or morph | |
19 private String highlightQuery; // complex Lucene query | |
20 private String highlightQueryForms; // highlight terms separated by a blank | |
21 private boolean highlightHitMode = false; | |
22 private int highlightHitModeOpenTags = 0; | |
23 private boolean firstPageBreakReachedMode = false; // in a page fragment: if a page break element is surrounded by an element (e.g. "s") then this element should not increment the currentHighlightElemPos | |
24 private boolean firstPageBreakReached = true; | |
25 private StringBuilder result = new StringBuilder(); | |
26 | |
27 public HighlightContentHandler() throws ApplicationException { | |
28 } | |
29 | |
30 public HighlightContentHandler(String highlightElemName, int highlightElemPos) throws ApplicationException { | |
31 this.highlightElemName = highlightElemName; | |
32 this.highlightElemPos = highlightElemPos; | |
33 } | |
34 | |
35 public HighlightContentHandler(String highlightElemName, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException { | |
36 this.highlightElemName = highlightElemName; | |
37 this.highlightElemPos = highlightElemPos; | |
38 this.highlightQueryType = highlightQueryType; | |
39 this.highlightQuery = highlightQuery; | |
40 if (highlightQuery != null) { | |
41 IndexHandler indexHandler = IndexHandler.getInstance(); | |
42 ArrayList<String> queryTerms = indexHandler.fetchTerms(highlightQuery, language); // all query terms in query (also morphological terms) | |
43 highlightQueryForms = toString(queryTerms); | |
44 } | |
45 } | |
46 | |
47 public void setFirstPageBreakReachedMode(boolean firstPageBreakReachedMode) { | |
48 this.firstPageBreakReachedMode = firstPageBreakReachedMode; | |
49 if (firstPageBreakReachedMode) | |
50 this.firstPageBreakReached = false; // is first set to false and later if a page break is found (by startElement) it is set to true | |
51 } | |
52 | |
53 public StringBuilder getResult() { | |
54 return result; | |
55 } | |
56 | |
57 public void startDocument() throws SAXException { | |
58 } | |
59 | |
60 public void endDocument() throws SAXException { | |
61 } | |
62 | |
63 public void characters(char[] c, int start, int length) throws SAXException { | |
64 char[] cCopy = new char[length]; | |
65 System.arraycopy(c, start, cCopy, 0, length); | |
66 String charactersStr = String.valueOf(cCopy); | |
67 if (charactersStr != null && ! charactersStr.equals("")) { | |
68 charactersStr = StringUtils.deresolveXmlEntities(charactersStr); | |
69 write(charactersStr); | |
70 } | |
71 } | |
72 | |
73 public void ignorableWhitespace(char[] c, int start, int length) throws SAXException { | |
74 } | |
75 | |
76 public void processingInstruction(String target, String data) throws SAXException { | |
77 } | |
78 | |
79 public void setDocumentLocator(Locator locator) { | |
80 } | |
81 | |
82 public void startPrefixMapping(String prefix, String uri) throws SAXException { | |
83 xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" "; | |
84 if (prefix != null && prefix.equals("")) | |
85 xmlnsString = "xmlns" + "=\"" + uri + "\" "; | |
86 } | |
87 | |
88 public void endPrefixMapping(String prefix) throws SAXException { | |
89 } | |
90 | |
91 public void skippedEntity(String name) throws SAXException { | |
92 } | |
93 | |
94 public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException { | |
95 int attrSize = attrs.getLength(); | |
96 String attrString = ""; | |
97 for (int i=0; i<attrSize; i++) { | |
98 String attrQName = attrs.getQName(i); | |
99 String attrValue = attrs.getValue(i); | |
100 attrValue = StringUtils.forXML(attrValue); | |
101 attrString = attrString + " " + attrQName + "=\"" + attrValue + "\""; | |
102 } | |
103 if (attrString != null && ! attrString.isEmpty()) { | |
104 attrString = attrString.trim(); | |
105 } | |
106 if (xmlnsString != null && ! xmlnsString.isEmpty()) { | |
107 xmlnsString = xmlnsString.trim(); | |
108 } | |
109 if (localName.equals("pb")) | |
110 firstPageBreakReached = true; | |
111 // start highlight element at position | |
112 if (highlightElemName != null && highlightElemName.equals(localName) && firstPageBreakReached) { | |
113 currentHighlightElemPos++; | |
114 if (currentHighlightElemPos == highlightElemPos && highlightElemModeOpenTags == 0) { | |
115 highlightElemMode = true; | |
116 write("<hi type=\"elem\">"); | |
117 } | |
118 } | |
119 if (highlightElemMode) { | |
120 highlightElemModeOpenTags++; | |
121 } | |
122 // start highlight query | |
123 if (highlightQuery != null && localName.equals("w")) { | |
124 boolean matched = false; | |
125 String attrQName = "form"; | |
126 if (highlightQueryType.equals("orig")) | |
127 attrQName = "form"; | |
128 else if (highlightQueryType.equals("reg")) | |
129 attrQName = "formRegularized"; | |
130 else if (highlightQueryType.equals("norm")) | |
131 attrQName = "formNormalized"; | |
132 else if (highlightQueryType.equals("morph")) | |
133 attrQName = "lemmas"; | |
134 String attrValue = getAttrValue(attrs, attrQName); | |
135 if (highlightQueryType.equals("reg") && attrValue == null) | |
136 attrValue = getAttrValue(attrs, "form"); // if no regularized form exist it takes the form | |
137 if (attrValue != null) { | |
138 String[] forms = highlightQueryForms.split(" "); | |
139 for (int i=0; i<forms.length; i++) { | |
140 if (! matched) { | |
141 String form = forms[i]; | |
142 if (form.endsWith("*")) { // TODO support middle wildcard queries: bla*bla bla?bla | |
143 form = form.replace("*", ""); | |
144 matched = attrValue.startsWith(form); | |
145 } else { | |
146 matched = attrValue.equals(form); | |
147 } | |
148 } | |
149 } | |
150 } | |
151 if ((highlightElemName == null && matched && highlightHitModeOpenTags == 0) || (highlightElemName != null && highlightElemMode && matched && highlightHitModeOpenTags == 0)) { | |
152 highlightHitMode = true; | |
153 write("<hi type=\"hit\">"); | |
154 } | |
155 } | |
156 if (highlightHitMode) { | |
157 highlightHitModeOpenTags++; | |
158 } | |
159 write("<" + name); | |
160 if (xmlnsString != null && ! xmlnsString.isEmpty()) | |
161 write(" " + xmlnsString); | |
162 if (attrString != null && ! attrString.isEmpty()) | |
163 write(" " + attrString); | |
164 write(">"); | |
165 xmlnsString = ""; | |
166 } | |
167 | |
168 public void endElement(String uri, String localName, String name) throws SAXException { | |
169 write("</" + name + ">"); | |
170 // end highlight element at position | |
171 if (highlightElemMode) { | |
172 if (highlightElemModeOpenTags == 1) { | |
173 highlightElemMode = false; | |
174 write("</hi>"); | |
175 } | |
176 highlightElemModeOpenTags--; | |
177 } | |
178 // end highlight query | |
179 if (highlightHitMode) { | |
180 if (highlightHitModeOpenTags == 1) { | |
181 highlightHitMode = false; | |
182 write("</hi>"); | |
183 } | |
184 highlightHitModeOpenTags--; | |
185 } | |
186 } | |
187 | |
188 private String toString(ArrayList<String> queryForms) { | |
189 String queryFormsStr = ""; | |
190 for (int i=0; i<queryForms.size(); i++) { | |
191 String form = queryForms.get(i); | |
192 queryFormsStr = queryFormsStr + form + " "; | |
193 } | |
194 if (queryForms == null || queryForms.size() == 0) | |
195 return null; | |
196 else | |
197 return queryFormsStr.substring(0, queryFormsStr.length() -1); | |
198 } | |
199 | |
200 private void write(String outStr) throws SAXException { | |
201 result.append(outStr); | |
202 } | |
203 | |
204 private String getAttrValue(Attributes attrs, String attrQName) { | |
205 String retValue = null; | |
206 int attrSize = attrs.getLength(); | |
207 for (int i=0; i<attrSize; i++) { | |
208 String attrQNameTmp = attrs.getQName(i); | |
209 String attrValue = attrs.getValue(i); | |
210 if (attrQNameTmp.equals(attrQName)) | |
211 return attrValue; | |
212 } | |
213 return retValue; | |
214 } | |
215 } |