annotate src/de/mpiwg/dwinter/fulltextSearchServer/SearchLines.java @ 0:db87c1b7eb6d

initial
author dwinter
date Wed, 03 Nov 2010 12:18:46 +0100
parents
children 83e9a828e794
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
db87c1b7eb6d initial
dwinter
parents:
diff changeset
1 package de.mpiwg.dwinter.fulltextSearchServer;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
2
db87c1b7eb6d initial
dwinter
parents:
diff changeset
3 import java.io.ByteArrayInputStream;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
4 import java.io.File;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
5 import java.io.IOException;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
6 import java.io.InputStream;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
7 import java.io.OutputStream;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
8 import java.io.Reader;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
9 import java.io.Writer;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
10 import java.net.URLDecoder;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
11 import java.nio.channels.ReadableByteChannel;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
12 import java.nio.channels.WritableByteChannel;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
13 import java.util.ArrayList;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
14 import java.util.Properties;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
15 import java.util.regex.Matcher;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
16 import java.util.regex.Pattern;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
17
db87c1b7eb6d initial
dwinter
parents:
diff changeset
18 import javax.xml.parsers.DocumentBuilder;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
19 import javax.xml.parsers.DocumentBuilderFactory;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
20 import javax.xml.parsers.ParserConfigurationException;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
21 import javax.xml.transform.TransformerFactoryConfigurationError;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
22 import javax.xml.xpath.XPath;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
23 import javax.xml.xpath.XPathConstants;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
24 import javax.xml.xpath.XPathExpression;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
25 import javax.xml.xpath.XPathExpressionException;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
26 import javax.xml.xpath.XPathFactory;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
27
db87c1b7eb6d initial
dwinter
parents:
diff changeset
28 import org.apache.log4j.Logger;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
29 import org.apache.lucene.analysis.Analyzer;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
30 import org.apache.lucene.index.Term;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
31 import org.apache.lucene.queryParser.ParseException;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
32 import org.apache.lucene.queryParser.QueryParser;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
33 import org.apache.lucene.search.Query;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
34 import org.apache.lucene.search.TermQuery;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
35 import org.apache.lucene.util.Version;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
36 import org.restlet.data.Form;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
37 import org.restlet.data.MediaType;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
38 import org.restlet.data.Parameter;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
39 import org.restlet.data.Status;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
40 import org.restlet.representation.Representation;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
41 import org.restlet.representation.StringRepresentation;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
42 import org.restlet.resource.Get;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
43 import org.restlet.resource.Options;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
44 import org.restlet.resource.ServerResource;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
45 import org.w3c.dom.Document;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
46 import org.w3c.dom.Node;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
47 import org.w3c.dom.NodeList;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
48 import org.xml.sax.SAXException;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
49
db87c1b7eb6d initial
dwinter
parents:
diff changeset
50 import de.mpiwg.dwinter.fulltext.search.FulltextSearchDocsLines;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
51 import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
52 import de.mpiwg.dwinter.fulltext.search.utils.OCRLine;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
53 import de.mpiwg.dwinter.fulltext.searcher.LanguageSearcher;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
54 import de.mpiwg.dwinter.fulltext.ticket.TicketWriter;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
55 import de.mpiwg.dwinter.fulltextSearchServer.Utils.ConfigurationManager;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
56 import de.mpiwg.dwinter.fulltextSearchServer.Utils.DigilibTools;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
57 import de.mpiwg.dwinter.fulltextSearchServer.Utils.XMLTools;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
58
db87c1b7eb6d initial
dwinter
parents:
diff changeset
59 public class SearchLines extends ServerResource {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
60
db87c1b7eb6d initial
dwinter
parents:
diff changeset
61 /**
db87c1b7eb6d initial
dwinter
parents:
diff changeset
62 * Erlaubt cross scripting bei Aufruf aus Javascript
db87c1b7eb6d initial
dwinter
parents:
diff changeset
63 *
db87c1b7eb6d initial
dwinter
parents:
diff changeset
64 * @param entity
db87c1b7eb6d initial
dwinter
parents:
diff changeset
65 */
db87c1b7eb6d initial
dwinter
parents:
diff changeset
66
db87c1b7eb6d initial
dwinter
parents:
diff changeset
67 private Logger logger = Logger.getRootLogger();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
68 private String cleanedPath;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
69
db87c1b7eb6d initial
dwinter
parents:
diff changeset
70 @Options
db87c1b7eb6d initial
dwinter
parents:
diff changeset
71 public void doOptions(Representation entity) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
72 Form responseHeaders = (Form) getResponse().getAttributes().get(
db87c1b7eb6d initial
dwinter
parents:
diff changeset
73 "org.restlet.http.headers");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
74 if (responseHeaders == null) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
75 responseHeaders = new Form();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
76 getResponse().getAttributes().put("org.restlet.http.headers",
db87c1b7eb6d initial
dwinter
parents:
diff changeset
77 responseHeaders);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
78 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
79 responseHeaders.add("Access-Control-Allow-Origin", "*");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
80 responseHeaders.add("Access-Control-Allow-Methods", "POST,OPTIONS,GET");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
81 responseHeaders.add("Access-Control-Allow-Headers", "Content-Type");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
82 responseHeaders.add("Access-Control-Allow-Credentials", "false");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
83 responseHeaders.add("Access-Control-Max-Age", "60");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
84 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
85
db87c1b7eb6d initial
dwinter
parents:
diff changeset
86 // @Get("xml")
db87c1b7eb6d initial
dwinter
parents:
diff changeset
87 public Representation getXML() throws IOException, ParseException {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
88
db87c1b7eb6d initial
dwinter
parents:
diff changeset
89 return new StringRepresentation(getHits(), MediaType.TEXT_XML);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
90
db87c1b7eb6d initial
dwinter
parents:
diff changeset
91 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
92
db87c1b7eb6d initial
dwinter
parents:
diff changeset
93 @Get("html")
db87c1b7eb6d initial
dwinter
parents:
diff changeset
94 public Representation getHTML()
db87c1b7eb6d initial
dwinter
parents:
diff changeset
95 throws TransformerFactoryConfigurationError, IOException,
db87c1b7eb6d initial
dwinter
parents:
diff changeset
96 ParseException, XPathExpressionException {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
97 // response header fuer cross-site.scripting
db87c1b7eb6d initial
dwinter
parents:
diff changeset
98 Form responseHeaders = (Form) getResponse().getAttributes().get(
db87c1b7eb6d initial
dwinter
parents:
diff changeset
99 "org.restlet.http.headers");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
100 if (responseHeaders == null) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
101 responseHeaders = new Form();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
102 getResponse().getAttributes().put("org.restlet.http.headers",
db87c1b7eb6d initial
dwinter
parents:
diff changeset
103 responseHeaders);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
104 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
105 responseHeaders.add("Access-Control-Allow-Origin", "*");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
106
db87c1b7eb6d initial
dwinter
parents:
diff changeset
107 // String txt =
db87c1b7eb6d initial
dwinter
parents:
diff changeset
108 // XMLTools.transformToHTML(getHits(),"/de/mpiwg/dwinter/fulltextSearchServer/xsl/processRestPathOfDocumentToHTML.xsl");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
109 String txt = getHits();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
110 if (getStatus().isError())
db87c1b7eb6d initial
dwinter
parents:
diff changeset
111 return new StringRepresentation(txt, MediaType.TEXT_HTML);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
112 String ret = "";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
113 // ret+="<pageFileName>"+pageFileName+"</pageFileName>";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
114 //ArrayList<Double[]> points = new ArrayList<Double[]>();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
115
db87c1b7eb6d initial
dwinter
parents:
diff changeset
116 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
117 dbf.setNamespaceAware(true);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
118 dbf.setValidating(false);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
119 DocumentBuilder db;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
120 try {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
121 db = dbf.newDocumentBuilder();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
122 } catch (ParserConfigurationException e) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
123 // TODO Auto-generated catch block
db87c1b7eb6d initial
dwinter
parents:
diff changeset
124 e.printStackTrace();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
125 return null;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
126 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
127 // db.setEntityResolver(new MyResolver());
db87c1b7eb6d initial
dwinter
parents:
diff changeset
128
db87c1b7eb6d initial
dwinter
parents:
diff changeset
129 Document doc;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
130 try {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
131 // stream = new StringInputStream(xml,"utf-8");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
132
db87c1b7eb6d initial
dwinter
parents:
diff changeset
133 ByteArrayInputStream stream = new ByteArrayInputStream(
db87c1b7eb6d initial
dwinter
parents:
diff changeset
134 txt.getBytes("utf-8"));
db87c1b7eb6d initial
dwinter
parents:
diff changeset
135 doc = db.parse(stream);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
136
db87c1b7eb6d initial
dwinter
parents:
diff changeset
137 } catch (SAXException e) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
138 // TODO Auto-generated catch block
db87c1b7eb6d initial
dwinter
parents:
diff changeset
139 e.printStackTrace();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
140 return null;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
141 } catch (IOException e) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
142 // TODO Auto-generated catch block
db87c1b7eb6d initial
dwinter
parents:
diff changeset
143 e.printStackTrace();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
144 return null;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
145 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
146
db87c1b7eb6d initial
dwinter
parents:
diff changeset
147 XPath xpath = XPathFactory.newInstance().newXPath();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
148 // XPath Query for showing all nodes value
db87c1b7eb6d initial
dwinter
parents:
diff changeset
149 XPathExpression expr = xpath.compile("//page");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
150 XPathExpression line = xpath.compile("line");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
151 XPathExpression name = xpath.compile("name");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
152
db87c1b7eb6d initial
dwinter
parents:
diff changeset
153 Object result = expr.evaluate(doc, XPathConstants.NODESET);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
154 NodeList pages = (NodeList) result;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
155 for (int i = 0; i < pages.getLength(); i++) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
156 NodeList names = (NodeList) name.evaluate(pages.item(i),
db87c1b7eb6d initial
dwinter
parents:
diff changeset
157 XPathConstants.NODESET);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
158 String pathName = "";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
159
db87c1b7eb6d initial
dwinter
parents:
diff changeset
160 if (names.getLength() == 1) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
161 Node obj = names.item(0);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
162 pathName = obj.getTextContent();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
163 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
164 NodeList lines = (NodeList) line.evaluate(pages.item(i),
db87c1b7eb6d initial
dwinter
parents:
diff changeset
165 XPathConstants.NODESET);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
166 ArrayList<Double[]> points = new ArrayList<Double[]>();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
167 for (int l = 0; l < lines.getLength(); l++) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
168 Double[] point = DigilibTools.calculatePoint(lines.item(l)
db87c1b7eb6d initial
dwinter
parents:
diff changeset
169 .getTextContent());
db87c1b7eb6d initial
dwinter
parents:
diff changeset
170 points.add(point);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
171 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
172
db87c1b7eb6d initial
dwinter
parents:
diff changeset
173 // Pattern linePattern =
db87c1b7eb6d initial
dwinter
parents:
diff changeset
174 // Pattern.compile("<line>(.*?)</line>",Pattern.MULTILINE);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
175 // Matcher m = linePattern.matcher(txt);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
176
db87c1b7eb6d initial
dwinter
parents:
diff changeset
177 // while(m.find()){
db87c1b7eb6d initial
dwinter
parents:
diff changeset
178 // Double[] point = DigilibTools.calculatePoint(m.group(1));
db87c1b7eb6d initial
dwinter
parents:
diff changeset
179 // points.add(point);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
180 // }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
181
db87c1b7eb6d initial
dwinter
parents:
diff changeset
182 String textId = (String) getRequest().getAttributes().get("textId");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
183
db87c1b7eb6d initial
dwinter
parents:
diff changeset
184 String url = String.format(DigilibTools.DIGIVIEWBASICSTRING,
db87c1b7eb6d initial
dwinter
parents:
diff changeset
185 DigilibTools.generateImagePath(textId, pathName),
db87c1b7eb6d initial
dwinter
parents:
diff changeset
186 DigilibTools.generateMarksFromPoints(points));
db87c1b7eb6d initial
dwinter
parents:
diff changeset
187
db87c1b7eb6d initial
dwinter
parents:
diff changeset
188 ret += String.format(
db87c1b7eb6d initial
dwinter
parents:
diff changeset
189 "<div class=\"hitsOnPage\"><a href=\"%s\">%s</a></div>",
db87c1b7eb6d initial
dwinter
parents:
diff changeset
190 url, pathName);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
191 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
192 ret += "";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
193
db87c1b7eb6d initial
dwinter
parents:
diff changeset
194 // return ret;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
195
db87c1b7eb6d initial
dwinter
parents:
diff changeset
196 return new StringRepresentation(ret, MediaType.TEXT_HTML);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
197 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
198
db87c1b7eb6d initial
dwinter
parents:
diff changeset
199 protected String getHits() throws IOException, ParseException {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
200 String textId = (String) getRequest().getAttributes().get("textId");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
201 String queryString = (String) getRequest().getAttributes().get(
db87c1b7eb6d initial
dwinter
parents:
diff changeset
202 "queryString");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
203 String lang = (String) getRequest().getAttributes().get("lang");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
204
db87c1b7eb6d initial
dwinter
parents:
diff changeset
205 Properties defaultProperties = ConfigurationManager.getConfig();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
206
db87c1b7eb6d initial
dwinter
parents:
diff changeset
207 File lineDir = new File(defaultProperties.getProperty("lineIndex"));
db87c1b7eb6d initial
dwinter
parents:
diff changeset
208 File docIndex = new File(defaultProperties.getProperty("docIndex"));
db87c1b7eb6d initial
dwinter
parents:
diff changeset
209
db87c1b7eb6d initial
dwinter
parents:
diff changeset
210
db87c1b7eb6d initial
dwinter
parents:
diff changeset
211 Boolean parse=true; // im regelfall spll der Querystring noch geparsed werden
db87c1b7eb6d initial
dwinter
parents:
diff changeset
212
db87c1b7eb6d initial
dwinter
parents:
diff changeset
213 Form form = getRequest().getResourceRef().getQueryAsForm(); // moeglicher parameter "parse" if "false" dann kein parsing des query strings
db87c1b7eb6d initial
dwinter
parents:
diff changeset
214 for (Parameter parameter : form) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
215 String name =parameter.getName();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
216 if (name.equals("parse")){
db87c1b7eb6d initial
dwinter
parents:
diff changeset
217 String parserQuestion = parameter.getValue();
db87c1b7eb6d initial
dwinter
parents:
diff changeset
218 if (parserQuestion.equals("false"))
db87c1b7eb6d initial
dwinter
parents:
diff changeset
219 parse=false;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
220 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
221 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
222 FulltextSearchDocsLines searcher = new FulltextSearchDocsLines(
db87c1b7eb6d initial
dwinter
parents:
diff changeset
223 docIndex, lineDir);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
224
db87c1b7eb6d initial
dwinter
parents:
diff changeset
225 LanguageSearcher ls = searcher.languageSearchers
db87c1b7eb6d initial
dwinter
parents:
diff changeset
226 .getSearcherByLanguage(lang);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
227 if (ls == null) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
228 setStatus(Status.CLIENT_ERROR_NOT_FOUND);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
229 return "<error>Language Not Found</error>";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
230 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
231 Analyzer analyzer = searcher.languageSearchers
db87c1b7eb6d initial
dwinter
parents:
diff changeset
232 .getSearcherByLanguage(lang).analyzer;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
233
db87c1b7eb6d initial
dwinter
parents:
diff changeset
234 QueryParser parser = new QueryParser(Version.LUCENE_30, "contents",
db87c1b7eb6d initial
dwinter
parents:
diff changeset
235 analyzer);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
236 queryString = URLDecoder.decode(queryString, "utf-8");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
237 logger.debug(queryString);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
238 Query query;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
239 if (parse){
db87c1b7eb6d initial
dwinter
parents:
diff changeset
240 query = parser.parse(queryString);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
241 } else {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
242 String[] splitted = queryString.split(":");
db87c1b7eb6d initial
dwinter
parents:
diff changeset
243 String qs;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
244 if (splitted.length>1)
db87c1b7eb6d initial
dwinter
parents:
diff changeset
245 qs = splitted[1];
db87c1b7eb6d initial
dwinter
parents:
diff changeset
246 else
db87c1b7eb6d initial
dwinter
parents:
diff changeset
247 qs = splitted[0];
db87c1b7eb6d initial
dwinter
parents:
diff changeset
248
db87c1b7eb6d initial
dwinter
parents:
diff changeset
249 Term term = new Term("contents",qs);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
250 query = new TermQuery(term);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
251 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
252
db87c1b7eb6d initial
dwinter
parents:
diff changeset
253 textId = textId.replace(":", "/"); // esetze pfad trenner TODO statt
db87c1b7eb6d initial
dwinter
parents:
diff changeset
254 // pfadtrenner ersetzen besser
db87c1b7eb6d initial
dwinter
parents:
diff changeset
255 // urlencode auch in den anderen
db87c1b7eb6d initial
dwinter
parents:
diff changeset
256 // klassen
db87c1b7eb6d initial
dwinter
parents:
diff changeset
257
db87c1b7eb6d initial
dwinter
parents:
diff changeset
258 OCRDoc result = searcher.searchInLinesDoc(textId, query, lang);
db87c1b7eb6d initial
dwinter
parents:
diff changeset
259
db87c1b7eb6d initial
dwinter
parents:
diff changeset
260 cleanedPath = result.document.get("cleanedPath") + "</cleanedPath>";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
261
db87c1b7eb6d initial
dwinter
parents:
diff changeset
262 String ret = "<xml>";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
263 ret += "<docId>" + textId + "</docId>";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
264 ret += "<cleanedPath>" + result.document.get("cleanedPath")
db87c1b7eb6d initial
dwinter
parents:
diff changeset
265 + "</cleanedPath>";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
266
db87c1b7eb6d initial
dwinter
parents:
diff changeset
267 if (result.linesInPage != null) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
268
db87c1b7eb6d initial
dwinter
parents:
diff changeset
269 for (String page : result.linesInPage.keySet()) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
270 ret += "<page><name>" + page + "</name>";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
271 for (OCRLine line : result.linesInPage.get(page)) {
db87c1b7eb6d initial
dwinter
parents:
diff changeset
272 ret += "<line>" + line.toString() + "</line>";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
273 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
274 ret += "</page>";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
275 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
276 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
277 ret += "</xml>";
db87c1b7eb6d initial
dwinter
parents:
diff changeset
278 return ret;
db87c1b7eb6d initial
dwinter
parents:
diff changeset
279 }
db87c1b7eb6d initial
dwinter
parents:
diff changeset
280 }