changeset 0:72a015318a6d

CLOSED - # 16: Zeige nur eine konfigurierbare Anzahl von Treffern an. https://it-dev.mpiwg-berlin.mpg.de/tracs/pythonOcropusTools/ticket/16
author dwinter
date Wed, 03 Nov 2010 12:26:20 +0100
parents
children 5c9c31510f0c
files src/de/mpiwg/dwinter/fulltext/search/FulltextSearchDocsLines.java src/de/mpiwg/dwinter/fulltext/ticket/LanguageWriter.java src/de/mpiwg/dwinter/fulltext/ticket/TicketWriter.java
diffstat 3 files changed, 904 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/fulltext/search/FulltextSearchDocsLines.java	Wed Nov 03 12:26:20 2010 +0100
@@ -0,0 +1,272 @@
+package de.mpiwg.dwinter.fulltext.search;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopDocsCollector;
+import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.Version;
+
+import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc;
+import de.mpiwg.dwinter.fulltext.search.utils.OCRLine;
+import de.mpiwg.dwinter.fulltext.searcher.LanguageSearcher;
+import de.mpiwg.dwinter.fulltext.searcher.LanguageSearchers;
+import de.mpiwg.dwinter.fulltext.ticket.TicketWriter;
+
+public class FulltextSearchDocsLines extends FulltextSearch {
+
+	protected static Logger logger = Logger.getRootLogger();
+
+	private static final int MAX_LINES = 10000;
+
+	protected File line_index_dir; // Index mit dem Zeilenindex
+
+	public LanguageSearchers languageSearchersLines = new LanguageSearchers();
+
+	public FulltextSearchDocsLines(File index_dir, File line_index_dir)
+			throws CorruptIndexException, LockObtainFailedException,
+			IOException {
+
+		super(index_dir);
+		this.line_index_dir = line_index_dir;
+		init_language_searchers_lines(line_index_dir);
+
+	}
+
+	protected void init_language_searchers_lines(File dir)
+			throws CorruptIndexException, LockObtainFailedException,
+			IOException {
+		languageSearchersLines.add(new LanguageSearcher("de",
+				new GermanAnalyzer(Version.LUCENE_30), dir));
+		languageSearchersLines.add(new LanguageSearcher("en",
+				new StandardAnalyzer(Version.LUCENE_30), dir));
+		languageSearchersLines.add(new LanguageSearcher("fr",
+				new FrenchAnalyzer(Version.LUCENE_30), dir));
+		languageSearchersLines.add(new LanguageSearcher("all",
+				new StandardAnalyzer(Version.LUCENE_30), dir));
+		languageSearchersLines.add(new LanguageSearcher("morph",
+				new StandardAnalyzer(Version.LUCENE_30), dir));
+		languageSearchersLines.add(new LanguageSearcher("la",
+				new StandardAnalyzer(Version.LUCENE_30), dir));
+	}
+
+	/**
+	 * Erzeugt Ergebnisliste im Filesystem fuer die Weiterbenutzung źbers
+	 * Servlet
+	 * 
+	 * @param query
+	 * @param language
+	 * @param ticket
+	 *            ticket unter dem auf die Daten zurźckgegriffen werden soll.
+	 * @return
+	 * @throws IOException
+	 */
+	public void searchInLinesToDir(Query query, String language, String ticket)
+			throws IOException {
+
+		// first step search docs
+		logger.debug("Start searching docs");
+		TopScoreDocCollector col = (TopScoreDocCollector) search(query,
+				language);
+		TopDocs docs = col.topDocs();
+		ScoreDoc[] scoreDocs = docs.scoreDocs;
+		// ArrayList<OCRDoc> ocrDocs = new ArrayList<OCRDoc>();
+
+		TicketWriter tw = new TicketWriter(ticket, query, language);
+
+		LanguageSearcher searcher = languageSearchers
+				.getSearcherByLanguage(language);
+		logger.debug("Start writing docs");
+		tw.writeResultsForLanguageSearch(language, docs, searcher.reader);
+		tw.commitTicket();
+		logger.debug("Wrote docs");
+		LanguageSearcher lineSearcher = languageSearchersLines
+				.getSearcherByLanguage(language);
+
+		Set<String> textIds = new HashSet<String>();
+
+		for (ScoreDoc doc : scoreDocs) {
+			Document d = searcher.reader.document(doc.doc);
+			String textID = d.get("textId");
+			logger.debug("Start:" + textID);
+
+			// teste ob schon gesucht TODO: warum sind manchmal textid mehrfach
+			// in der treffer liste?
+			if (!textIds.contains(textID)) {
+				textIds.add(textID);
+
+				Query textIDQuery = new TermQuery(new Term("textId", textID));
+				//Query[] queries = new Query[] { query, textIDQuery };
+				//Query lineQuery = query.combine(queries);
+
+				BooleanQuery booleanQuery = new BooleanQuery();
+				booleanQuery.add(textIDQuery, BooleanClause.Occur.MUST);
+				booleanQuery.add(query, BooleanClause.Occur.MUST);
+				// suche jetzt die Zeilen
+				TopScoreDocCollector lineCol = TopScoreDocCollector.create(
+						MAX_LINES, false);
+				lineSearcher.searcher.search(booleanQuery, lineCol);
+				logger.debug("Searched:" + textID);
+				OCRDoc ocrDoc = new OCRDoc();
+				ocrDoc.docId = doc.doc;
+				ocrDoc.document = d;
+				ocrDoc.textId = d.get("textId");
+
+				Map<String, ArrayList<OCRLine>> ocrPages = new HashMap<String, ArrayList<OCRLine>>();
+
+				for (ScoreDoc line : lineCol.topDocs().scoreDocs) {
+					// fuege alle zeile zusammen
+					OCRLine ocrLine = new OCRLine();
+					Document lineD = lineSearcher.reader.document(line.doc);
+					ocrLine.pageDimension = lineD.get("pageDimension");
+					ocrLine.bbox = lineD.get("bbox");
+					ocrLine.lineNumber = lineD.get("lineNumber");
+
+					String pageNumber = getPageName(lineD.get("cleanedPath"));
+
+					if (!ocrPages.containsKey(pageNumber)) {
+						ocrPages.put(pageNumber, new ArrayList<OCRLine>());
+					}
+
+					ArrayList<OCRLine> page = ocrPages.get(pageNumber);
+					page.add(ocrLine);
+
+				}
+				logger.debug("collected:" + textID);
+				ocrDoc.linesInPage = ocrPages;
+				tw.writeDoc(language, ocrDoc);
+				tw.commitTicket();
+				logger.debug("written:" + textID);
+			} else {
+				logger.debug("already done:" + textID);
+			}
+		}
+		tw.closeTicket(language);
+		logger.debug("everything done!");
+	}
+
+	
+	public OCRDoc searchInLinesDoc(String textId,Query query, String language) throws IOException{
+		Query textIDQuery = new TermQuery(new Term("textId", textId));
+		BooleanQuery booleanQuery = new BooleanQuery();
+		booleanQuery.add(textIDQuery, BooleanClause.Occur.MUST);
+		booleanQuery.add(query, BooleanClause.Occur.MUST);
+		
+		List<OCRDoc> docs = searchInLines(booleanQuery, language);
+		
+		if (docs.size()==0)
+			return new OCRDoc();
+		else
+			return docs.get(0);
+		
+			
+	}
+	public List<OCRDoc> searchInLines(Query query, String language)
+			throws IOException {
+
+		// first step search docs
+		logger.debug("Start searching docs.");
+
+		TopScoreDocCollector col = (TopScoreDocCollector) search(query,
+				language);
+		TopDocs docs = col.topDocs();
+		ScoreDoc[] scoreDocs = docs.scoreDocs;
+		ArrayList<OCRDoc> ocrDocs = new ArrayList<OCRDoc>();
+
+		LanguageSearcher searcher = languageSearchers
+				.getSearcherByLanguage(language);
+		LanguageSearcher lineSearcher = languageSearchersLines
+				.getSearcherByLanguage(language);
+
+		logger.debug("found docs.");
+		Set<String> textIds = new HashSet<String>();
+		for (ScoreDoc doc : scoreDocs) {
+			Document d = searcher.reader.document(doc.doc);
+			String textID = d.get("textId");
+
+			if (!textIds.contains(textID)) {
+				textIds.add(textID);
+
+				Query textIDQuery = new TermQuery(new Term("textId", textID));
+				//Query[] queries = new Query[] { query, textIDQuery };
+				//Query lineQuery = query.combine(queries);
+
+				BooleanQuery booleanQuery = new BooleanQuery();
+				booleanQuery.add(textIDQuery, BooleanClause.Occur.MUST);
+				booleanQuery.add(query, BooleanClause.Occur.MUST);
+				
+
+				// suche jtzt die Zeilen
+				TopScoreDocCollector lineCol = TopScoreDocCollector.create(
+						MAX_LINES, false);
+				lineSearcher.searcher.search(booleanQuery, lineCol);
+				logger.debug("Searched:" + textID);
+				OCRDoc ocrDoc = new OCRDoc();
+				ocrDoc.docId = doc.doc;
+				ocrDoc.document = d;
+
+				Map<String, ArrayList<OCRLine>> ocrPages = new HashMap<String, ArrayList<OCRLine>>();
+
+				for (ScoreDoc line : lineCol.topDocs().scoreDocs) {
+					// fuege alle zeile zusammen
+					OCRLine ocrLine = new OCRLine();
+					Document lineD = lineSearcher.reader.document(line.doc);
+					ocrLine.pageDimension = lineD.get("pageDimension");
+					ocrLine.bbox = lineD.get("bbox");
+					ocrLine.lineNumber = lineD.get("lineNumber");
+
+					String pageNumber = getPageName(lineD.get("cleanedPath"));
+					logger.debug("collect:" + pageNumber);
+					if (!ocrPages.containsKey(pageNumber)) {
+						ocrPages.put(pageNumber, new ArrayList<OCRLine>());
+					}
+
+					ArrayList<OCRLine> page = ocrPages.get(pageNumber);
+					page.add(ocrLine);
+				}
+				logger.debug("collected:" + textID);
+				ocrDoc.linesInPage = ocrPages;
+				ocrDocs.add(ocrDoc);
+			} else {
+				logger.debug("already done:" + textID);
+			}
+		}
+
+		return ocrDocs;
+	}
+
+	/**
+	 * Gibt aus dem Pfad denDateinamen zurueck, der dann als Seitenname benutzt
+	 * wird.
+	 * 
+	 * @param path
+	 * @return
+	 */
+	private String getPageName(String path) {
+		File f = new File(path);
+
+		return f.getName();
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/fulltext/ticket/LanguageWriter.java	Wed Nov 03 12:26:20 2010 +0100
@@ -0,0 +1,130 @@
+package de.mpiwg.dwinter.fulltext.ticket;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopScoreDocCollector;
+
+import sun.security.krb5.internal.PAEncTSEnc;
+
+import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc;
+import de.mpiwg.dwinter.fulltext.search.utils.OCRLine;
+import de.mpiwg.dwinter.lucencetools.documents.FileDocument;
+
+public class LanguageWriter extends TicketWriter{
+
+	private File languageFile;
+	private String language;
+
+	public LanguageWriter(String ticket, String language) throws IOException{
+		
+		super(ticket);
+		
+		languageFile = new File(ticketFile.getAbsolutePath()+PATHSEPARATOR+language);
+		if(!languageFile.exists()){
+			logger.debug("Create Languagefolder:"+languageFile.getCanonicalPath());
+			if(!languageFile.mkdirs())
+				throw new IOException();	
+		}
+		 
+		this.language=language;
+	}
+
+	/** Schreibt das Ergebnis einer Suche in eine Datei im Ticket.
+	 * @param docs
+	 * @param reader
+	 * @return Anzahl der tatsaechlich geschriebenen Treffer
+	 * @throws CorruptIndexException
+	 * @throws IOException
+	 */
+	public Integer writeResults(TopDocs docs, IndexReader reader) throws CorruptIndexException, IOException {
+		File resultFile = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+"result");
+		FileOutputStream fs = new FileOutputStream(resultFile);
+		OutputStreamWriter rw = new OutputStreamWriter(fs,"utf-8");
+	
+		Set<String> idsAlreadyDone = new HashSet<String>(); // TODO: aus irgendwelche gruenden gibt es ein Dokument mehrfach in den Fundstellen
+		
+		//FileWriter rw = new FileWriter(resultFile);
+		
+		if(!languageFile.exists()){
+			logger.debug("Create Languagefolder:"+languageFile.getCanonicalPath());
+			if(!languageFile.mkdirs())
+				throw new IOException();	
+		}
+		Integer counter =0;
+		for (ScoreDoc sd:docs.scoreDocs)
+		{
+			Document d = reader.document(sd.doc);
+			
+			String id = d.get("textId");
+			if (!idsAlreadyDone.contains(id)){ // stelle sicher das alle treffer nur einmal in die date geschrieben werden.
+				idsAlreadyDone.add(id);
+				rw.write(FileDocument.toXML(d)+"\n");
+				counter ++;
+			}
+		}
+		rw.close();
+		return counter;
+	}
+
+	
+	public void writeResultInfo(TopDocs docs, Integer counter) throws IOException {
+		File resultFile = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+"resultInfo");
+		FileOutputStream fs = new FileOutputStream(resultFile);
+		OutputStreamWriter rw = new OutputStreamWriter(fs,"utf-8");
+		String ret = "<resultInfo>";
+		int hits = docs.totalHits;
+		ret +="<lang>"+language+"</lang>";
+		ret +="<hits>"+counter+"</hits>";
+		ret +="<totalHits>"+hits+"</totalHits>";
+		
+		ret+= "</resultInfo>";
+		rw.write(ret);
+		rw.close(); 
+	}
+	
+
+	public void writeDoc(OCRDoc ocrDoc) throws IOException {
+		// erzeuge fuer jedes document einen ordner
+		
+		File docFile = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+ocrDoc.textId.replace(PATHSEPARATORCHAR, ':'));
+		if(!docFile.exists()){
+			logger.debug("Create Docfolder:"+docFile.getCanonicalPath());
+			if(!docFile.mkdirs())
+				throw new IOException();	
+		}
+		
+		// jetzt fuer jese seite ein file
+		for (String page:ocrDoc.linesInPage.keySet()){
+			File pageFile = new File(docFile.getAbsolutePath()+PATHSEPARATOR+page.replace(PATHSEPARATORCHAR, ':'));
+			FileWriter pageFileWriter = new FileWriter(pageFile);
+			
+			for (OCRLine line: ocrDoc.linesInPage.get(page)){
+				pageFileWriter.write(line.toString()+"\n");
+			}
+			pageFileWriter.close();
+		}
+	}
+
+	public void saveQuery(Query query) throws IOException {
+	
+		File qf = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+"query");
+		FileWriter fw = new FileWriter(qf);
+		fw.write(query.toString());
+		fw.close();
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/fulltext/ticket/TicketWriter.java	Wed Nov 03 12:26:20 2010 +0100
@@ -0,0 +1,502 @@
+package de.mpiwg.dwinter.fulltext.ticket;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStreamWriter;
+import java.nio.CharBuffer;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+
+import org.apache.commons.io.FileUtils;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopScoreDocCollector;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.input.SAXBuilder;
+import org.jdom.output.XMLOutputter;
+import org.jdom.xpath.XPath;
+
+import com.sun.org.apache.xpath.internal.operations.Bool;
+
+
+
+import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc;
+
+/**
+ * @author dwinter
+ *
+ * Schreibt die Ergebnisse einer Suche in ein Ticket
+ */
+public class TicketWriter {
+
+    protected static Logger logger = Logger.getRootLogger();
+    
+	private static final String TICKET_PATH = "/tmp/ticketfolder";
+
+	protected static final String PATHSEPARATOR = "/";
+	protected static final char PATHSEPARATORCHAR = '/';
+	protected File ticketFile;
+	private Map<String,LanguageWriter> languageFolders = new HashMap<String,LanguageWriter>();
+
+	private String ticket;
+	private Boolean ticketOpen = false;
+	private HashMap<String,Boolean> ticketOpenLangs = new HashMap<String,Boolean>();
+	private File lastChangeFile;
+
+
+	/** Erzeuge TicketWriter fuer ein Ticker
+	 * @param ticket
+	 * @throws IOException 
+	 */
+	public TicketWriter(String ticket) throws IOException {
+		
+		
+		this.ticket = ticket;
+		ticketOpen = true;
+		ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
+		if(!ticketFile.exists()){
+			logger.debug("Create Ticketfolder:"+ticketFile.getCanonicalPath());
+			if(!ticketFile.mkdirs())
+				throw new IOException();
+			
+		}
+			
+		lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange");
+	
+		commitTicket();
+		
+	}
+
+	public TicketWriter(String ticket, Query query) throws IOException {
+		this(ticket);
+		saveQuery(query);
+		
+		
+	}
+	
+	public TicketWriter(String ticket, Query query,ArrayList<String> languages) throws IOException {
+		this(ticket);
+		saveQuery(query);
+		for (String lang: languages){
+			ticketOpenLangs.put(lang, true);
+		}
+		
+	}
+
+	
+	public TicketWriter(String ticket, Query query, String language) throws IOException {
+		this(ticket);
+		saveQuery(query);
+		
+		LanguageWriter lw = new LanguageWriter(ticket, language);
+		lw.saveQuery(query);
+	}
+
+	public void saveQuery(Query query) throws IOException {
+		
+		File qf = new File(ticketFile+PATHSEPARATOR+"query");
+		FileWriter fw = new FileWriter(qf);
+		fw.write(query.toString());
+		fw.close();
+	}
+
+	/** Sichert die Suchergebnisse fźr ein Ticket und eine Sprache. 
+	 * @param language 
+	 * @param docs
+	 * @param reader 
+	 * @throws IOException 
+	 */
+	public void writeResultsForLanguageSearch(String language, TopDocs docs, IndexReader reader) throws IOException {
+		LanguageWriter lw = new LanguageWriter(ticket, language);
+		Integer counter= lw.writeResults(docs, reader);
+		lw.writeResultInfo(docs,counter);
+		
+	}
+
+	/** Gibt bekannt, dass sich ein Ticket geaendert hat.
+	 * @param ticket
+	 * @throws IOException 
+	 */ 
+	public void commitTicket() throws IOException {
+		
+		FileWriter lastChangeFw = new FileWriter(lastChangeFile);
+		
+		Date currentDate = new java.util.Date();
+		DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
+		lastChangeFw.write(dateFormat.format(currentDate));
+		lastChangeFw.close();
+		
+		
+	}
+
+	/**Schreibe ein Ergebnis in das Ticket
+	 * @param language 
+	 * @param ocrDoc
+	 * @throws IOException 
+	 */
+	public void writeDoc(String language, OCRDoc ocrDoc) throws IOException {
+		LanguageWriter lw = new LanguageWriter(ticket, language);
+		lw.writeDoc(ocrDoc);
+		
+	}
+
+	/**
+	 * Ticket closed
+	 * @throws IOException 
+	 */
+	public void closeTicket() throws IOException {
+			ticketOpen=false;
+			FileWriter lastChangeFw = new FileWriter(lastChangeFile);
+			lastChangeFw.write("closed");
+			lastChangeFw.close();
+			
+			
+		}// TODO Auto-generated method stub
+		
+	
+	/**
+	 * @param language
+	 * close a ticket for a language
+	 * @throws IOException 
+	 */
+	public void closeTicket(String language) throws IOException {
+		String filename = lastChangeFile.getAbsolutePath()+"_"+language;
+		FileWriter lastChangeFw = new FileWriter(new File(filename));
+		lastChangeFw.write("closed");
+		lastChangeFw.close();
+		ticketOpenLangs.put(language, false);
+		// gehe durch alle sprache und schliesse das gesamte ticket falls alle tickets fuer die sprachen geschlossen sind
+		
+		Boolean close = true;
+		for (String lang: ticketOpenLangs.keySet()){
+			if (ticketOpenLangs.get(lang))
+				close = false;
+		}
+		
+		if (close){ // kein ticket mehr offen dann
+			closeTicket();
+		}
+		
+	}// TODO Auto-generated method stub
+	
+	
+
+
+	static public String getNewTicket(){
+		Random rd = new Random();
+		String tnr = String.valueOf(rd.nextInt());
+		
+		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+tnr);
+		while (ticketFile.exists()){
+			tnr = String.valueOf(Math.abs(rd.nextInt()));
+			ticketFile = new File(TICKET_PATH+PATHSEPARATOR+tnr);
+		}
+		
+		return tnr;
+	}
+
+	public static String getXML(String ticket) throws FileNotFoundException, IOException {
+		String ret="<ticket><number>"+ticket+"</number>";
+		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
+		if (!ticketFile.exists()){
+			return null;
+		}else{
+			File lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange");
+			if (!lastChangeFile.exists()){
+				return null;
+			}else{
+				 ret += "<lastChanged>"+readFileToString(lastChangeFile)+"</lastChanged>";
+				
+			}
+			File queryFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"query");
+			if (!queryFile.exists()){
+				return null;
+			}else{
+				 ret += "<query>"+readFileToString(queryFile)+"</query>";
+				
+			}
+			
+			//gehe jetzt durch das directory alle directory dort sind sprachen abhaengige resultsets
+			for (File file: ticketFile.listFiles()){
+				if(file.isDirectory()){
+					ret+="<lang>"+file.getName()+"</lang>";
+				}
+			}
+			ret +="</ticket>";
+		}
+		return ret;
+	}
+	
+
+	
+
+	
+	
+
+	/**
+	 * Hole das Ergebnis des Tickets in der entsprechenden Sprachen 
+	 *
+	 * @param ticket
+	 * @param lang
+	 * @return
+	 * @throws FileNotFoundException
+	 * @throws IOException
+	 */
+	public static String getResultXML(String ticket, String lang) throws FileNotFoundException, IOException {
+		String ret="<resultSet lang=\""+lang+"\"  queryString=\""+getQueryString(lang,ticket)+"\"><number>"+ticket+"</number>";
+		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
+		if (!ticketFile.exists()){
+			return null;
+		}else{
+			File resultFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+lang+PATHSEPARATOR+"result");
+			if (!resultFile.exists()){
+				return null;
+			}else{
+				 ret += "<results>"+readFileToString(resultFile)+"</results>";
+				
+			}
+			ret +="</resultSet>";
+		}
+		return ret;
+	}
+	
+	
+	/**
+	 * Hole die Ergebnisse der entsprechende Sprache aus dem Ticket, 
+	 * eingeschrŠnkt auf durch from to angegebenen Datensaetze.
+	 * Achtung: der erste Datensatz hat die Nummer 0.
+	 * @param ticket
+	 * @param lang
+	 * @param from
+	 * @param to
+	 * @return
+	 * @throws IOException 
+	 * @throws FileNotFoundException 
+	 * @throws JDOMException 
+	 */
+	
+	public static String getResultXML(String ticket, String lang,
+			Integer from, Integer to) throws FileNotFoundException, IOException, JDOMException {
+		String ret = getResultXML(ticket, lang);
+		
+		String retresult="<resultSet lang=\""+lang+"\"  queryString=\""+getQueryString(lang,ticket)+"\"><number>"+ticket+"</number><results>";
+		
+		SAXBuilder builder = new SAXBuilder();
+		InputStream stream = new ByteArrayInputStream(ret.getBytes("utf-8"));
+		Document doc = builder.build(stream);
+		XPath xpath = XPath.newInstance("//result");
+		List<Element> resultsElements = xpath.selectNodes(doc);
+		if (resultsElements.size()>(from-1)) {// zaehlweise, der erste Datensatz hat die Nummer 0
+			to = Math.min(to, resultsElements.size()-1); 
+			for(int i =from;i<=to;i++){
+				Element el = resultsElements.get(i);
+				String str = (new XMLOutputter()).outputString(el);
+				retresult+=str;
+				
+			}
+		}
+		
+			
+		
+		return retresult+ "</results></resultSet>";
+	}
+	
+	
+	public static String getResultInfoXML(String ticket, String lang) throws IOException {
+		String ret="";
+		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
+		if (!ticketFile.exists()){
+			return null;
+		}else{
+			File resultFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+lang+PATHSEPARATOR+"resultInfo");
+			if (!resultFile.exists()){
+				return null;
+			}else{
+				 ret += readFileToString(resultFile);		
+			
+			}
+		return ret;
+		}
+	}
+	
+		
+		
+	
+	private  static String getQueryString(String ticket) throws FileNotFoundException, IOException {
+		String ticketString = TICKET_PATH+PATHSEPARATOR+ticket;
+		File qf = new File(ticketString+PATHSEPARATOR+"query");
+		
+		return readFileToString(qf);
+	}
+	
+	private  static String getQueryString(String lang,String ticket) throws FileNotFoundException, IOException {
+		String ticketString = TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang;
+		File qf = new File(ticketString+PATHSEPARATOR+"query");
+		
+		return readFileToString(qf);
+	}
+
+	public static String getPagesWithHits(String ticket, String lang,
+			String textId) {
+		String ret="<xml xmlns:xlink=\"http://www.w3.org/1999/xlink\">";
+		File textIdFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId);
+		if (!textIdFile.exists()){
+			return null;
+		}else{
+			
+			//String url = String.format(DIGIVIEWBASICSTRING, generateImagePath(textId,pageFileName),generateMarksFromPoints(points));
+			
+			
+	
+		
+			int pages = textIdFile.listFiles().length;
+			ret+="<numberOfPages>"+String.valueOf(pages)+"</numberOfPages><pages>";
+			for (File file: textIdFile.listFiles()){
+				ret+="<page xlink:href=\"tickets/"+ ticket+"/"+lang+"/"+textId  +"/" +file.getName()+"\">"+file.getName()+"</page>";
+			}
+			
+			ret +="</pages></xml>";
+		}
+		return ret;
+	}
+	
+	public static String getHitsOnPage(String ticket, String lang,
+			String textId, String restPath) throws FileNotFoundException, IOException {
+		String ret="<xml>";
+		File pageFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId+PATHSEPARATOR+restPath);
+		if (!pageFile.exists()){
+			return null;
+		}else{
+			
+			String fl = readFileToString(pageFile);
+			String[] splitted = fl.split("\n");
+			for (String lin:splitted){
+				ret+="<line>"+lin+"</line>";
+			}
+			ret +="</xml>";
+		}
+		return ret;
+	}
+
+	private static String readFileToString(File lastChangeFile)
+			throws FileNotFoundException, IOException {
+		byte[] buffer = new byte[(int) lastChangeFile.length()];
+		 BufferedInputStream f = new BufferedInputStream(new FileInputStream(lastChangeFile));
+		 f.read(buffer);
+		 
+		return new String(buffer,"utf-8");
+	}
+
+	public static boolean isClosed(String ticket) {
+		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
+		if (!ticketFile.exists()){
+			return false;
+		}else{
+			File lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange");
+			if (!lastChangeFile.exists()){
+				return false;
+			}else{
+				 try {
+					if (readFileToString(lastChangeFile).equals("closed"))
+						 return true;
+				} catch (FileNotFoundException e) {		
+					return false;
+				} catch (IOException e) {
+					return false;
+				}
+				
+			}
+		return false;
+		}
+	
+	}
+	
+	public static boolean exists(String ticket) {
+		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
+		return ticketFile.exists();
+		
+	
+		
+	}
+	public static String getAllTickets() {
+		File ticketFolder = new File(TICKET_PATH);
+		String[] tickets = ticketFolder.list();
+		String ret="<xml><tickets>";
+		for (String ticket: tickets){
+			ret+="<ticket><id>"+ticket+"</id></ticket>";
+		}
+		ret +="</tickets></xml>";
+		return ret;
+	}
+	
+	
+
+	public static void main(String[] args){
+		if (args.length==0){
+			 System.out.println("Usage: TicketWriter -clean minutes, loescht alle tickets, die aelter als minutes sind."); 
+			 System.exit(1);
+		}
+		
+		if (args[0].equals("-clean")){
+			if (args.length<2){
+				 System.out.println("Usage: TicketWriter -clean minutes, loescht alle tickets, die aelter als minutes sind."); 
+				 System.exit(1);
+			}
+			
+			try {
+				cleanTickets(args[1]);
+			} catch (IOException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+			}
+		}
+	}
+
+	private static void cleanTickets(String minutes) throws IOException {
+		File ticketFolder = new File(TICKET_PATH);
+		 File[] tickets = ticketFolder.listFiles();
+		 for (File ticket:tickets){
+			File lmFile = new File(ticket.getAbsolutePath()+PATHSEPARATOR+"lastChange");
+			long time = lmFile.lastModified();
+			
+			long currentime = System.currentTimeMillis();
+			
+			long diff = currentime-time;
+			
+			if (diff > Long.valueOf(minutes)*60*1000){
+				FileUtils.deleteDirectory(ticket);
+				
+			}
+			 
+		 }
+		
+	}
+
+
+
+
+
+}
+
+	
+