changeset 2:2b29b0b6db16 default tip

Version mit integrierter Suche ?ber XML-Volltexte
author dwinter
date Wed, 26 Jan 2011 14:41:09 +0100
parents 5c9c31510f0c
children
files src/de/mpiwg/dwinter/fulltext/search/FulltextSearch.java src/de/mpiwg/dwinter/fulltext/search/FulltextSearchDocsLines.java src/de/mpiwg/dwinter/fulltext/search/IFulltextSearch.java src/de/mpiwg/dwinter/fulltext/search/IFulltextSearchDocsLines.java src/de/mpiwg/dwinter/fulltext/search/xmlsearchadapter/XMLSearchServerAdapter.java src/de/mpiwg/dwinter/fulltext/searcher/ILanguageSearcher.java src/de/mpiwg/dwinter/fulltext/searcher/LanguageSearcher.java src/de/mpiwg/dwinter/fulltext/ticket/TicketWriter.java
diffstat 8 files changed, 490 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/src/de/mpiwg/dwinter/fulltext/search/FulltextSearch.java	Wed Nov 03 12:26:29 2010 +0100
+++ b/src/de/mpiwg/dwinter/fulltext/search/FulltextSearch.java	Wed Jan 26 14:41:09 2011 +0100
@@ -43,7 +43,7 @@
 
 
 
-public class FulltextSearch {
+public class FulltextSearch implements IFulltextSearch {
 
 	protected File index_dir;
 	
@@ -82,14 +82,23 @@
 		languageSearchers.add(new LanguageSearcher("la",new StandardAnalyzer(Version.LUCENE_30),dir));
 	}
 	
+	/* (non-Javadoc)
+	 * @see de.mpiwg.dwinter.fulltext.search.IFulltextSearch#search(org.apache.lucene.search.Query, java.lang.String)
+	 */
 	public Collector search(Query query,String language) throws IOException{
 		return languageSearchers.searchLanguage(query, language);
 	}
 	
+	/* (non-Javadoc)
+	 * @see de.mpiwg.dwinter.fulltext.search.IFulltextSearch#search(org.apache.lucene.search.Query)
+	 */
 	public HashMap<String, Collector> search(Query query) throws IOException{
 		return languageSearchers.searchAllLanguages(query);
 	}
 
+	/* (non-Javadoc)
+	 * @see de.mpiwg.dwinter.fulltext.search.IFulltextSearch#searchMD(java.lang.String, java.lang.String, java.util.ArrayList)
+	 */
 	public HashMap<String, Collector> searchMD(String searchString, String mdString, ArrayList<String> languages) throws ParseException, IOException {
 		if (mdString==null & languages==null)
 			return languageSearchers.parseAndsearchAllLanguages(searchString);
@@ -98,6 +107,9 @@
 		
 	}
 
+	/* (non-Javadoc)
+	 * @see de.mpiwg.dwinter.fulltext.search.IFulltextSearch#search(java.lang.String)
+	 */
 	public HashMap<String, Collector> search(String searchString) throws IOException, ParseException {
 		
 		return languageSearchers.parseAndsearchAllLanguages(searchString);
@@ -109,6 +121,9 @@
 //		return languageSearchers.parseAndsearchAndAnalyseAllLanguages(searchString);
 //	}
 
+	/* (non-Javadoc)
+	 * @see de.mpiwg.dwinter.fulltext.search.IFulltextSearch#searchForMorph(java.lang.String, java.lang.String)
+	 */
 	public ArrayList<String> searchForMorph(String path, String word) throws ParseException, IOException, ParserConfigurationException, SAXException, XPathExpressionException {
 		
 		LanguageSearcher searcher = languageSearchers.getSearcherByLanguage("morph");
--- a/src/de/mpiwg/dwinter/fulltext/search/FulltextSearchDocsLines.java	Wed Nov 03 12:26:29 2010 +0100
+++ b/src/de/mpiwg/dwinter/fulltext/search/FulltextSearchDocsLines.java	Wed Jan 26 14:41:09 2011 +0100
@@ -34,7 +34,7 @@
 import de.mpiwg.dwinter.fulltext.searcher.LanguageSearchers;
 import de.mpiwg.dwinter.fulltext.ticket.TicketWriter;
 
-public class FulltextSearchDocsLines extends FulltextSearch {
+public class FulltextSearchDocsLines extends FulltextSearch implements IFulltextSearchDocsLines {
 
 	protected static Logger logger = Logger.getRootLogger();
 
@@ -71,16 +71,8 @@
 				new StandardAnalyzer(Version.LUCENE_30), dir));
 	}
 
-	/**
-	 * Erzeugt Ergebnisliste im Filesystem fuer die Weiterbenutzung Ÿbers
-	 * Servlet
-	 * 
-	 * @param query
-	 * @param language
-	 * @param ticket
-	 *            ticket unter dem auf die Daten zurŸckgegriffen werden soll.
-	 * @return
-	 * @throws IOException
+	/* (non-Javadoc)
+	 * @see de.mpiwg.dwinter.fulltext.search.IFulltextSearchDocsLines#searchInLinesToDir(org.apache.lucene.search.Query, java.lang.String, java.lang.String)
 	 */
 	public void searchInLinesToDir(Query query, String language, String ticket)
 			throws IOException {
@@ -167,6 +159,9 @@
 	}
 
 	
+	/* (non-Javadoc)
+	 * @see de.mpiwg.dwinter.fulltext.search.IFulltextSearchDocsLines#searchInLinesDoc(java.lang.String, org.apache.lucene.search.Query, java.lang.String)
+	 */
 	public OCRDoc searchInLinesDoc(String textId,Query query, String language) throws IOException{
 		Query textIDQuery = new TermQuery(new Term("textId", textId));
 		BooleanQuery booleanQuery = new BooleanQuery();
@@ -182,6 +177,9 @@
 		
 			
 	}
+	/* (non-Javadoc)
+	 * @see de.mpiwg.dwinter.fulltext.search.IFulltextSearchDocsLines#searchInLines(org.apache.lucene.search.Query, java.lang.String)
+	 */
 	public List<OCRDoc> searchInLines(Query query, String language)
 			throws IOException {
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/fulltext/search/IFulltextSearch.java	Wed Jan 26 14:41:09 2011 +0100
@@ -0,0 +1,34 @@
+package de.mpiwg.dwinter.fulltext.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.xpath.XPathExpressionException;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Query;
+import org.xml.sax.SAXException;
+
+public interface IFulltextSearch {
+
+	public abstract Collector search(Query query, String language)
+			throws IOException;
+
+	public abstract HashMap<String, Collector> search(Query query)
+			throws IOException;
+
+	public abstract HashMap<String, Collector> searchMD(String searchString,
+			String mdString, ArrayList<String> languages)
+			throws ParseException, IOException;
+
+	public abstract HashMap<String, Collector> search(String searchString)
+			throws IOException, ParseException;
+
+	public abstract ArrayList<String> searchForMorph(String path, String word)
+			throws ParseException, IOException, ParserConfigurationException,
+			SAXException, XPathExpressionException;
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/fulltext/search/IFulltextSearchDocsLines.java	Wed Jan 26 14:41:09 2011 +0100
@@ -0,0 +1,32 @@
+package de.mpiwg.dwinter.fulltext.search;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.lucene.search.Query;
+
+import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc;
+
+public interface IFulltextSearchDocsLines {
+
+	/**
+	 * Erzeugt Ergebnisliste im Filesystem fuer die Weiterbenutzung Ÿbers
+	 * Servlet
+	 * 
+	 * @param query
+	 * @param language
+	 * @param ticket
+	 *            ticket unter dem auf die Daten zurŸckgegriffen werden soll.
+	 * @return
+	 * @throws IOException
+	 */
+	public abstract void searchInLinesToDir(Query query, String language,
+			String ticket) throws IOException;
+
+	public abstract OCRDoc searchInLinesDoc(String textId, Query query,
+			String language) throws IOException;
+
+	public abstract List<OCRDoc> searchInLines(Query query, String language)
+			throws IOException;
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/fulltext/search/xmlsearchadapter/XMLSearchServerAdapter.java	Wed Jan 26 14:41:09 2011 +0100
@@ -0,0 +1,364 @@
+/**
+ * 
+ */
+package de.mpiwg.dwinter.fulltext.search.xmlsearchadapter;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.input.SAXBuilder;
+import org.jdom.output.Format;
+import org.jdom.output.XMLOutputter;
+import org.jdom.xpath.XPath;
+
+import de.mpiwg.dwinter.fulltext.search.IFulltextSearchDocsLines;
+import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc;
+import de.mpiwg.dwinter.fulltext.search.utils.OCRLine;
+import de.mpiwg.dwinter.fulltext.ticket.TicketWriter;
+import de.mpiwg.dwinter.lucencetools.documents.FileDocument;
+
+/**
+ * @author dwinter
+ * 
+ */
+public class XMLSearchServerAdapter implements IFulltextSearchDocsLines {
+
+	protected static Logger logger = Logger.getRootLogger();
+
+	public static String XMLServerSearchBase = "http://mpdl-test.mpiwg-berlin.mpg.de:30030/mpdl/interface/queryResult.xql?";
+	//public static String XMLDocSearchBase = "http://mpdl-test.mpiwg-berlin.mpg.de:30030/mpdl/interface/doc-query.xql?";
+	public static String XMLDocSearchBase = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?";
+	public static String XMLServerBase = "http://mpdl-test.mpiwg-berlin.mpg.de:30030/mpdl/interface/";
+	
+	
+	//http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=/echo/la/Bernoulli_1738_AZ870BWE.xml&mode=text&query-type=fulltext&query=quantitas
+	// http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql?document=/echo/la/Bernoulli_1738_AZ870BWE.xml&queryType=fulltext&query=quantitas
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * de.mpiwg.dwinter.fulltext.search.IFulltextSearchDocsLines#searchInLinesToDir
+	 * (org.apache.lucene.search.Query, java.lang.String, java.lang.String)
+	 */
+	
+	@Override
+	public void searchInLinesToDir(Query query, String calledLanguage, String ticket)
+			throws IOException {
+		String languageFolderName;
+		String language;
+		//check format of the language string could be lang:xml or just lang
+		String[] langsplitted = calledLanguage.split(":");
+		if(langsplitted.length>1){
+			if(langsplitted[1].equals("XML")){
+				language=langsplitted[0];
+				languageFolderName=calledLanguage;
+			} else {
+				language=calledLanguage;
+				languageFolderName=calledLanguage+":XML";
+			}
+			
+		} else {
+			language=calledLanguage;
+			languageFolderName=calledLanguage+":XML";
+		}
+		
+		
+		
+		TicketWriter tw = new TicketWriter(ticket, query, languageFolderName);
+
+		File languageFile = new File(tw.ticketFile.getAbsolutePath()
+				+ tw.PATHSEPARATOR + languageFolderName);
+		if (!languageFile.exists()) {
+			logger.debug("Create Languagefolder:"
+					+ languageFile.getCanonicalPath());
+			if (!languageFile.mkdirs())
+				throw new IOException();
+		}
+
+		// docbase=archimedes&docbase=echo&queryType=fulltextMorph&language=la&ftMorphQuery=quantitas&pn=1&output=xml&pageSize=50
+		SAXBuilder parser = new SAXBuilder();
+
+		String queryString = XMLServerSearchBase
+				+ "docbase=archimedes&docbase=echo&queryType=fulltextMorph";
+		queryString += "&language=" + language;
+
+		Set<Term> terms = new HashSet<Term>();
+		query.extractTerms(terms);
+		String morphQuery = "";
+		for (Term t : terms) {
+			if (t.field().equals("contents"))
+				morphQuery = t.text();
+		}
+		queryString += "&ftMorphQuery=" + morphQuery;
+		queryString += "&pn=1&output=xml&pageSize=500";
+		Document doc;
+		try {
+			doc = parser.build(queryString);
+		} catch (JDOMException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return;
+		}
+
+		int counter = writeResults(tw, languageFolderName, ticket, doc);
+		writeResultInfo(tw, doc, counter, languageFolderName);
+
+		tw.commitTicket();
+
+		List<Element> docElements;
+		try {
+			XPath docsXP = XPath.newInstance("//document");
+			docElements = docsXP.selectNodes(doc);
+		} catch (JDOMException e1) {
+			// TODO Auto-generated catch block
+			e1.printStackTrace();
+			return;
+		}
+
+		for (Element e : docElements) {
+			Element textIdElement;
+			try {
+				textIdElement = (Element) XPath.selectSingleNode(e, "uri");
+			} catch (JDOMException e1) {
+				// TODO Auto-generated catch block
+				continue;
+			}
+			String textId = textIdElement.getTextTrim();
+
+			File docFile = new File(languageFile.getAbsolutePath()
+					+ TicketWriter.PATHSEPARATOR
+					+ textId.replace(TicketWriter.PATHSEPARATORCHAR, ':'));
+			if (!docFile.exists()) {
+				logger.debug("Create Docfolder:" + docFile.getCanonicalPath());
+				if (!docFile.mkdirs())
+					throw new IOException();
+			}
+
+			// TODO: jetzt fuer jede seite ein file, zur Zeit jeweils nur ein
+			// File pro Document!
+			// for (String page:ocrDoc.linesInPage.keySet()){
+
+			File pageFile = new File(docFile.getAbsolutePath()
+					+ TicketWriter.PATHSEPARATOR
+					+ textId.replace(TicketWriter.PATHSEPARATORCHAR, ':'));
+			FileWriter pageFileWriter = new FileWriter(pageFile);
+			// http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql?document=/echo/la/Bernoulli_1738_AZ870BWE.xml&queryType=fulltext&query=quantitas
+			http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=/echo/la/Bernoulli_1738_AZ870BWE.xml&mode=text&query-type=fulltext&query=quantitas
+			// for (OCRLine line: ocrDoc.linesInPage.get(page)){
+			// pageFileWriter.write("allLines"+"\n");
+			// }
+			queryString = XMLDocSearchBase + "document=" + textId;
+			//queryString += "&queryType=fulltext&query=" + morphQuery;
+			queryString += "&mode=text&query-type=fulltext&query=" + morphQuery;
+
+			try {
+				doc = parser.build(queryString);
+			} catch (JDOMException e2) {
+				// TODO Auto-generated catch block
+				e2.printStackTrace();
+				return;
+			}
+
+			XMLOutputter op = new XMLOutputter(Format.getCompactFormat());
+			op.output(doc, pageFileWriter);
+			pageFileWriter.close();
+		}
+		tw.closeTicket(languageFolderName);
+	}
+
+	private void writeResultInfo(TicketWriter tw, Document doc, int counter,
+			String languageFolderName) throws IOException {
+		//String languageFolderName = language + "_XML";
+
+		File languageFile = new File(tw.ticketFile.getAbsolutePath()
+				+ TicketWriter.PATHSEPARATOR + languageFolderName);
+		File resultFile = new File(languageFile.getAbsolutePath()
+				+ TicketWriter.PATHSEPARATOR + "resultInfo");
+		FileOutputStream fs = new FileOutputStream(resultFile);
+		OutputStreamWriter rw = new OutputStreamWriter(fs, "utf-8");
+		String ret = "<resultInfo>";
+		// int hits = docs.totalHits;
+		ret += "<lang>" + languageFolderName+"</lang>";
+		ret += "<hits>" + counter + "</hits>";
+		ret += "<totalHits>" + counter + "</totalHits>";// TODO: gibt es in
+														// diesem fall einen
+														// unterschied zwischen
+														// hits und totalhits?
+
+		ret += "</resultInfo>";
+		rw.write(ret);
+		rw.close();
+	}
+
+	private int writeResults(TicketWriter tw, String languageFolderName, String ticket,
+			Document doc) throws IOException {
+		OutputStreamWriter rw = null;
+		File languageFile;
+		//String languageFolderName = language + "_XML";
+
+		try {
+			languageFile = new File(tw.ticketFile.getAbsolutePath()
+					+ tw.PATHSEPARATOR + languageFolderName);
+			File resultFile = new File(languageFile.getAbsolutePath()
+					+ tw.PATHSEPARATOR + "result");
+			FileOutputStream fs = new FileOutputStream(resultFile);
+			rw = new OutputStreamWriter(fs, "utf-8");
+		} catch (FileNotFoundException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return -1;
+		} catch (UnsupportedEncodingException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+			return -1;
+		}
+
+		Set<String> idsAlreadyDone = new HashSet<String>(); // TODO: aus
+															// irgendwelche
+															// gruenden gibt es
+															// ein Dokument
+															// mehrfach in den
+															// Fundstellen
+
+		// FileWriter rw = new FileWriter(resultFile);
+
+		if (!languageFile.exists()) {
+			logger.debug("Create Languagefolder:"
+					+ languageFile.getCanonicalPath());
+			if (!languageFile.mkdirs())
+				throw new IOException();
+		}
+		Integer counter = 0;
+		@SuppressWarnings("unchecked")
+		List<Element> elements;
+		try {
+			XPath xpathDoc = XPath.newInstance("//document");
+			elements = xpathDoc.selectNodes(doc);
+		} catch (JDOMException e1) {
+			// TODO Auto-generated catch block
+			e1.printStackTrace();
+			return -1;
+		}
+
+		for (Element e : elements) {
+			try {
+				XPath xpathUri = XPath.newInstance("uri");
+				Element uri = (Element) xpathUri.selectSingleNode(e);
+
+				String id = uri.getTextTrim();
+				if (!idsAlreadyDone.contains(id)) { // stelle sicher das alle
+													// treffer nur einmal in die
+													// date geschrieben werden.
+					idsAlreadyDone.add(id);
+
+					String textId = id;
+					String md = "";
+					Element mdEl = (Element) XPath
+							.selectSingleNode(e, "author");
+					md += "<dc:creator>" + formatXML(mdEl.getTextTrim())
+							+ "</dc:creator>";
+
+					mdEl = (Element) XPath.selectSingleNode(e, "title");
+					md += "<dc:title>" + formatXML(mdEl.getTextTrim())
+							+ "</dc:title>";
+
+					mdEl = (Element) XPath.selectSingleNode(e, "place");
+					md += "<dc:place>" + formatXML(mdEl.getTextTrim())
+							+ "</dc:place>";
+
+					mdEl = (Element) XPath.selectSingleNode(e, "date");
+					md += "<dc:date>" + formatXML(mdEl.getTextTrim())
+							+ "</dc:date>";
+
+					String ret = "<result xmlns:dc=\"http://dublincore.org/documents/dcmi-namespace/\">";
+					ret += "<cleanedPath>" + textId + "</cleanedPath>";
+					ret += "<textId>" + textId.replace("/", ":") + "</textId>";
+					ret += "<textIdCleaned>" + textId.replace("/", "_")
+							+ "</textIdCleaned>";
+					ret += "<md>" + md + "</md>";
+					ret += "</result>";
+
+					rw.write(ret);
+					counter++;
+				}
+			} catch (JDOMException e1) {
+				// TODO Auto-generated catch block
+				e1.printStackTrace();
+				return -1;
+			}
+		}
+		rw.close();
+		return counter;
+	}
+
+	private String formatXML(String string) {
+		String retStr = string.replace("&", "&amp;");
+		retStr = retStr.replace("<", "&lt;");
+		retStr = retStr.replace(">", "&gt;");
+		return retStr;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * de.mpiwg.dwinter.fulltext.search.IFulltextSearchDocsLines#searchInLinesDoc
+	 * (java.lang.String, org.apache.lucene.search.Query, java.lang.String)
+	 */
+	@Override
+	public OCRDoc searchInLinesDoc(String textId, Query query, String language)
+			throws IOException {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see
+	 * de.mpiwg.dwinter.fulltext.search.IFulltextSearchDocsLines#searchInLines
+	 * (org.apache.lucene.search.Query, java.lang.String)
+	 */
+	@Override
+	public List<OCRDoc> searchInLines(Query query, String language)
+			throws IOException {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	public static void main(String[] args) {
+		Term t = new Term("contents", "quantitas");
+		Query q = new TermQuery(t);
+		XMLSearchServerAdapter sa = new XMLSearchServerAdapter();
+		try {
+			sa.searchInLinesToDir(q, "la", "121");
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+	}
+
+	public static List<String> getSupportedLanguages() {
+		String langs[] = new String[] { "la:XML", "it:XML" };
+		return Arrays.asList(langs);
+
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/fulltext/searcher/ILanguageSearcher.java	Wed Jan 26 14:41:09 2011 +0100
@@ -0,0 +1,16 @@
+package de.mpiwg.dwinter.fulltext.searcher;
+
+import java.io.IOException;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.Collector;
+
+public interface ILanguageSearcher {
+
+	public abstract Collector parseAndSearch(String searchString)
+			throws ParseException, IOException;
+
+	public abstract Collector parseAndSearch(String searchString,
+			String mdString) throws ParseException, IOException;
+
+}
\ No newline at end of file
--- a/src/de/mpiwg/dwinter/fulltext/searcher/LanguageSearcher.java	Wed Nov 03 12:26:29 2010 +0100
+++ b/src/de/mpiwg/dwinter/fulltext/searcher/LanguageSearcher.java	Wed Jan 26 14:41:09 2011 +0100
@@ -28,7 +28,7 @@
 
 import de.mpiwg.dwinter.lucencetools.analyzer.LanguageAnalyzer;
 
-public class LanguageSearcher extends LanguageAnalyzer {
+public class LanguageSearcher extends LanguageAnalyzer implements ILanguageSearcher {
 
 	static final int MAX_RESULTS = 10000; // Maximalanzahl Treffer
 	public Searcher searcher=null;
@@ -42,6 +42,9 @@
 		
 	}
 
+	/* (non-Javadoc)
+	 * @see de.mpiwg.dwinter.fulltext.searcher.ILanguageSearcher#parseAndSearch(java.lang.String)
+	 */
 	public Collector parseAndSearch(String searchString) throws ParseException, IOException
 	{
 		Query query= parser.parse(searchString);
@@ -57,6 +60,9 @@
 		
 	}	
 	
+	/* (non-Javadoc)
+	 * @see de.mpiwg.dwinter.fulltext.searcher.ILanguageSearcher#parseAndSearch(java.lang.String, java.lang.String)
+	 */
 	public Collector parseAndSearch(String searchString, String mdString) throws ParseException, IOException {
 		Query query= parser.parse(searchString +" AND dcMetaData:"+mdString);
 		System.out.println("Parse and search:"+query);
--- a/src/de/mpiwg/dwinter/fulltext/ticket/TicketWriter.java	Wed Nov 03 12:26:29 2010 +0100
+++ b/src/de/mpiwg/dwinter/fulltext/ticket/TicketWriter.java	Wed Jan 26 14:41:09 2011 +0100
@@ -53,9 +53,9 @@
     
 	private static final String TICKET_PATH = "/tmp/ticketfolder";
 
-	protected static final String PATHSEPARATOR = "/";
-	protected static final char PATHSEPARATORCHAR = '/';
-	protected File ticketFile;
+	public static final String PATHSEPARATOR = "/";
+	public static final char PATHSEPARATORCHAR = '/';
+	public File ticketFile;
 	private Map<String,LanguageWriter> languageFolders = new HashMap<String,LanguageWriter>();
 
 	private String ticket;
@@ -171,7 +171,7 @@
 			lastChangeFw.close();
 			
 			
-		}// TODO Auto-generated method stub
+		}
 		
 	
 	/**
@@ -349,7 +349,7 @@
 		return readFileToString(qf);
 	}
 	
-	private  static String getQueryString(String lang,String ticket) throws FileNotFoundException, IOException {
+	public static String getQueryString(String lang,String ticket) throws FileNotFoundException, IOException {
 		String ticketString = TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang;
 		File qf = new File(ticketString+PATHSEPARATOR+"query");
 		
@@ -492,6 +492,13 @@
 		
 	}
 
+	public static String getFileContent(String ticket, String lang, String textId,
+			String pageFileName) throws FileNotFoundException, IOException  {
+		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId+PATHSEPARATOR+pageFileName);
+		return readFileToString(ticketFile);
+
+	}
+