# HG changeset patch # User dwinter # Date 1288783580 -3600 # Node ID 72a015318a6dcbd3841a2ddd6db6141e7f982a5f CLOSED - # 16: Zeige nur eine konfigurierbare Anzahl von Treffern an. https://it-dev.mpiwg-berlin.mpg.de/tracs/pythonOcropusTools/ticket/16 diff -r 000000000000 -r 72a015318a6d src/de/mpiwg/dwinter/fulltext/search/FulltextSearchDocsLines.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltext/search/FulltextSearchDocsLines.java Wed Nov 03 12:26:20 2010 +0100 @@ -0,0 +1,272 @@ +package de.mpiwg.dwinter.fulltext.search; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.log4j.Logger; +import org.apache.lucene.analysis.de.GermanAnalyzer; +import org.apache.lucene.analysis.fr.FrenchAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.search.TopScoreDocCollector; +import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.util.Version; + +import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc; +import de.mpiwg.dwinter.fulltext.search.utils.OCRLine; +import de.mpiwg.dwinter.fulltext.searcher.LanguageSearcher; +import de.mpiwg.dwinter.fulltext.searcher.LanguageSearchers; +import de.mpiwg.dwinter.fulltext.ticket.TicketWriter; + +public class FulltextSearchDocsLines extends FulltextSearch { + + protected static Logger logger = Logger.getRootLogger(); + + private static final int MAX_LINES = 10000; + + protected File line_index_dir; // Index mit dem Zeilenindex + + public LanguageSearchers languageSearchersLines = new LanguageSearchers(); + + public FulltextSearchDocsLines(File index_dir, File line_index_dir) + throws CorruptIndexException, LockObtainFailedException, + IOException { + + super(index_dir); + this.line_index_dir = line_index_dir; + init_language_searchers_lines(line_index_dir); + + } + + protected void init_language_searchers_lines(File dir) + throws CorruptIndexException, LockObtainFailedException, + IOException { + languageSearchersLines.add(new LanguageSearcher("de", + new GermanAnalyzer(Version.LUCENE_30), dir)); + languageSearchersLines.add(new LanguageSearcher("en", + new StandardAnalyzer(Version.LUCENE_30), dir)); + languageSearchersLines.add(new LanguageSearcher("fr", + new FrenchAnalyzer(Version.LUCENE_30), dir)); + languageSearchersLines.add(new LanguageSearcher("all", + new StandardAnalyzer(Version.LUCENE_30), dir)); + languageSearchersLines.add(new LanguageSearcher("morph", + new StandardAnalyzer(Version.LUCENE_30), dir)); + languageSearchersLines.add(new LanguageSearcher("la", + new StandardAnalyzer(Version.LUCENE_30), dir)); + } + + /** + * Erzeugt Ergebnisliste im Filesystem fuer die Weiterbenutzung źbers + * Servlet + * + * @param query + * @param language + * @param ticket + * ticket unter dem auf die Daten zurźckgegriffen werden soll. + * @return + * @throws IOException + */ + public void searchInLinesToDir(Query query, String language, String ticket) + throws IOException { + + // first step search docs + logger.debug("Start searching docs"); + TopScoreDocCollector col = (TopScoreDocCollector) search(query, + language); + TopDocs docs = col.topDocs(); + ScoreDoc[] scoreDocs = docs.scoreDocs; + // ArrayList ocrDocs = new ArrayList(); + + TicketWriter tw = new TicketWriter(ticket, query, language); + + LanguageSearcher searcher = languageSearchers + .getSearcherByLanguage(language); + logger.debug("Start writing docs"); + tw.writeResultsForLanguageSearch(language, docs, searcher.reader); + tw.commitTicket(); + logger.debug("Wrote docs"); + LanguageSearcher lineSearcher = languageSearchersLines + .getSearcherByLanguage(language); + + Set textIds = new HashSet(); + + for (ScoreDoc doc : scoreDocs) { + Document d = searcher.reader.document(doc.doc); + String textID = d.get("textId"); + logger.debug("Start:" + textID); + + // teste ob schon gesucht TODO: warum sind manchmal textid mehrfach + // in der treffer liste? + if (!textIds.contains(textID)) { + textIds.add(textID); + + Query textIDQuery = new TermQuery(new Term("textId", textID)); + //Query[] queries = new Query[] { query, textIDQuery }; + //Query lineQuery = query.combine(queries); + + BooleanQuery booleanQuery = new BooleanQuery(); + booleanQuery.add(textIDQuery, BooleanClause.Occur.MUST); + booleanQuery.add(query, BooleanClause.Occur.MUST); + // suche jetzt die Zeilen + TopScoreDocCollector lineCol = TopScoreDocCollector.create( + MAX_LINES, false); + lineSearcher.searcher.search(booleanQuery, lineCol); + logger.debug("Searched:" + textID); + OCRDoc ocrDoc = new OCRDoc(); + ocrDoc.docId = doc.doc; + ocrDoc.document = d; + ocrDoc.textId = d.get("textId"); + + Map> ocrPages = new HashMap>(); + + for (ScoreDoc line : lineCol.topDocs().scoreDocs) { + // fuege alle zeile zusammen + OCRLine ocrLine = new OCRLine(); + Document lineD = lineSearcher.reader.document(line.doc); + ocrLine.pageDimension = lineD.get("pageDimension"); + ocrLine.bbox = lineD.get("bbox"); + ocrLine.lineNumber = lineD.get("lineNumber"); + + String pageNumber = getPageName(lineD.get("cleanedPath")); + + if (!ocrPages.containsKey(pageNumber)) { + ocrPages.put(pageNumber, new ArrayList()); + } + + ArrayList page = ocrPages.get(pageNumber); + page.add(ocrLine); + + } + logger.debug("collected:" + textID); + ocrDoc.linesInPage = ocrPages; + tw.writeDoc(language, ocrDoc); + tw.commitTicket(); + logger.debug("written:" + textID); + } else { + logger.debug("already done:" + textID); + } + } + tw.closeTicket(language); + logger.debug("everything done!"); + } + + + public OCRDoc searchInLinesDoc(String textId,Query query, String language) throws IOException{ + Query textIDQuery = new TermQuery(new Term("textId", textId)); + BooleanQuery booleanQuery = new BooleanQuery(); + booleanQuery.add(textIDQuery, BooleanClause.Occur.MUST); + booleanQuery.add(query, BooleanClause.Occur.MUST); + + List docs = searchInLines(booleanQuery, language); + + if (docs.size()==0) + return new OCRDoc(); + else + return docs.get(0); + + + } + public List searchInLines(Query query, String language) + throws IOException { + + // first step search docs + logger.debug("Start searching docs."); + + TopScoreDocCollector col = (TopScoreDocCollector) search(query, + language); + TopDocs docs = col.topDocs(); + ScoreDoc[] scoreDocs = docs.scoreDocs; + ArrayList ocrDocs = new ArrayList(); + + LanguageSearcher searcher = languageSearchers + .getSearcherByLanguage(language); + LanguageSearcher lineSearcher = languageSearchersLines + .getSearcherByLanguage(language); + + logger.debug("found docs."); + Set textIds = new HashSet(); + for (ScoreDoc doc : scoreDocs) { + Document d = searcher.reader.document(doc.doc); + String textID = d.get("textId"); + + if (!textIds.contains(textID)) { + textIds.add(textID); + + Query textIDQuery = new TermQuery(new Term("textId", textID)); + //Query[] queries = new Query[] { query, textIDQuery }; + //Query lineQuery = query.combine(queries); + + BooleanQuery booleanQuery = new BooleanQuery(); + booleanQuery.add(textIDQuery, BooleanClause.Occur.MUST); + booleanQuery.add(query, BooleanClause.Occur.MUST); + + + // suche jtzt die Zeilen + TopScoreDocCollector lineCol = TopScoreDocCollector.create( + MAX_LINES, false); + lineSearcher.searcher.search(booleanQuery, lineCol); + logger.debug("Searched:" + textID); + OCRDoc ocrDoc = new OCRDoc(); + ocrDoc.docId = doc.doc; + ocrDoc.document = d; + + Map> ocrPages = new HashMap>(); + + for (ScoreDoc line : lineCol.topDocs().scoreDocs) { + // fuege alle zeile zusammen + OCRLine ocrLine = new OCRLine(); + Document lineD = lineSearcher.reader.document(line.doc); + ocrLine.pageDimension = lineD.get("pageDimension"); + ocrLine.bbox = lineD.get("bbox"); + ocrLine.lineNumber = lineD.get("lineNumber"); + + String pageNumber = getPageName(lineD.get("cleanedPath")); + logger.debug("collect:" + pageNumber); + if (!ocrPages.containsKey(pageNumber)) { + ocrPages.put(pageNumber, new ArrayList()); + } + + ArrayList page = ocrPages.get(pageNumber); + page.add(ocrLine); + } + logger.debug("collected:" + textID); + ocrDoc.linesInPage = ocrPages; + ocrDocs.add(ocrDoc); + } else { + logger.debug("already done:" + textID); + } + } + + return ocrDocs; + } + + /** + * Gibt aus dem Pfad denDateinamen zurueck, der dann als Seitenname benutzt + * wird. + * + * @param path + * @return + */ + private String getPageName(String path) { + File f = new File(path); + + return f.getName(); + } + +} diff -r 000000000000 -r 72a015318a6d src/de/mpiwg/dwinter/fulltext/ticket/LanguageWriter.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltext/ticket/LanguageWriter.java Wed Nov 03 12:26:20 2010 +0100 @@ -0,0 +1,130 @@ +package de.mpiwg.dwinter.fulltext.ticket; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopScoreDocCollector; + +import sun.security.krb5.internal.PAEncTSEnc; + +import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc; +import de.mpiwg.dwinter.fulltext.search.utils.OCRLine; +import de.mpiwg.dwinter.lucencetools.documents.FileDocument; + +public class LanguageWriter extends TicketWriter{ + + private File languageFile; + private String language; + + public LanguageWriter(String ticket, String language) throws IOException{ + + super(ticket); + + languageFile = new File(ticketFile.getAbsolutePath()+PATHSEPARATOR+language); + if(!languageFile.exists()){ + logger.debug("Create Languagefolder:"+languageFile.getCanonicalPath()); + if(!languageFile.mkdirs()) + throw new IOException(); + } + + this.language=language; + } + + /** Schreibt das Ergebnis einer Suche in eine Datei im Ticket. + * @param docs + * @param reader + * @return Anzahl der tatsaechlich geschriebenen Treffer + * @throws CorruptIndexException + * @throws IOException + */ + public Integer writeResults(TopDocs docs, IndexReader reader) throws CorruptIndexException, IOException { + File resultFile = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+"result"); + FileOutputStream fs = new FileOutputStream(resultFile); + OutputStreamWriter rw = new OutputStreamWriter(fs,"utf-8"); + + Set idsAlreadyDone = new HashSet(); // TODO: aus irgendwelche gruenden gibt es ein Dokument mehrfach in den Fundstellen + + //FileWriter rw = new FileWriter(resultFile); + + if(!languageFile.exists()){ + logger.debug("Create Languagefolder:"+languageFile.getCanonicalPath()); + if(!languageFile.mkdirs()) + throw new IOException(); + } + Integer counter =0; + for (ScoreDoc sd:docs.scoreDocs) + { + Document d = reader.document(sd.doc); + + String id = d.get("textId"); + if (!idsAlreadyDone.contains(id)){ // stelle sicher das alle treffer nur einmal in die date geschrieben werden. + idsAlreadyDone.add(id); + rw.write(FileDocument.toXML(d)+"\n"); + counter ++; + } + } + rw.close(); + return counter; + } + + + public void writeResultInfo(TopDocs docs, Integer counter) throws IOException { + File resultFile = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+"resultInfo"); + FileOutputStream fs = new FileOutputStream(resultFile); + OutputStreamWriter rw = new OutputStreamWriter(fs,"utf-8"); + String ret = ""; + int hits = docs.totalHits; + ret +=""+language+""; + ret +=""+counter+""; + ret +=""+hits+""; + + ret+= ""; + rw.write(ret); + rw.close(); + } + + + public void writeDoc(OCRDoc ocrDoc) throws IOException { + // erzeuge fuer jedes document einen ordner + + File docFile = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+ocrDoc.textId.replace(PATHSEPARATORCHAR, ':')); + if(!docFile.exists()){ + logger.debug("Create Docfolder:"+docFile.getCanonicalPath()); + if(!docFile.mkdirs()) + throw new IOException(); + } + + // jetzt fuer jese seite ein file + for (String page:ocrDoc.linesInPage.keySet()){ + File pageFile = new File(docFile.getAbsolutePath()+PATHSEPARATOR+page.replace(PATHSEPARATORCHAR, ':')); + FileWriter pageFileWriter = new FileWriter(pageFile); + + for (OCRLine line: ocrDoc.linesInPage.get(page)){ + pageFileWriter.write(line.toString()+"\n"); + } + pageFileWriter.close(); + } + } + + public void saveQuery(Query query) throws IOException { + + File qf = new File(languageFile.getAbsolutePath()+PATHSEPARATOR+"query"); + FileWriter fw = new FileWriter(qf); + fw.write(query.toString()); + fw.close(); + } + +} diff -r 000000000000 -r 72a015318a6d src/de/mpiwg/dwinter/fulltext/ticket/TicketWriter.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/dwinter/fulltext/ticket/TicketWriter.java Wed Nov 03 12:26:20 2010 +0100 @@ -0,0 +1,502 @@ +package de.mpiwg.dwinter.fulltext.ticket; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStreamWriter; +import java.nio.CharBuffer; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + + +import org.apache.commons.io.FileUtils; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopScoreDocCollector; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; +import org.jdom.output.XMLOutputter; +import org.jdom.xpath.XPath; + +import com.sun.org.apache.xpath.internal.operations.Bool; + + + +import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc; + +/** + * @author dwinter + * + * Schreibt die Ergebnisse einer Suche in ein Ticket + */ +public class TicketWriter { + + protected static Logger logger = Logger.getRootLogger(); + + private static final String TICKET_PATH = "/tmp/ticketfolder"; + + protected static final String PATHSEPARATOR = "/"; + protected static final char PATHSEPARATORCHAR = '/'; + protected File ticketFile; + private Map languageFolders = new HashMap(); + + private String ticket; + private Boolean ticketOpen = false; + private HashMap ticketOpenLangs = new HashMap(); + private File lastChangeFile; + + + /** Erzeuge TicketWriter fuer ein Ticker + * @param ticket + * @throws IOException + */ + public TicketWriter(String ticket) throws IOException { + + + this.ticket = ticket; + ticketOpen = true; + ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); + if(!ticketFile.exists()){ + logger.debug("Create Ticketfolder:"+ticketFile.getCanonicalPath()); + if(!ticketFile.mkdirs()) + throw new IOException(); + + } + + lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange"); + + commitTicket(); + + } + + public TicketWriter(String ticket, Query query) throws IOException { + this(ticket); + saveQuery(query); + + + } + + public TicketWriter(String ticket, Query query,ArrayList languages) throws IOException { + this(ticket); + saveQuery(query); + for (String lang: languages){ + ticketOpenLangs.put(lang, true); + } + + } + + + public TicketWriter(String ticket, Query query, String language) throws IOException { + this(ticket); + saveQuery(query); + + LanguageWriter lw = new LanguageWriter(ticket, language); + lw.saveQuery(query); + } + + public void saveQuery(Query query) throws IOException { + + File qf = new File(ticketFile+PATHSEPARATOR+"query"); + FileWriter fw = new FileWriter(qf); + fw.write(query.toString()); + fw.close(); + } + + /** Sichert die Suchergebnisse fźr ein Ticket und eine Sprache. + * @param language + * @param docs + * @param reader + * @throws IOException + */ + public void writeResultsForLanguageSearch(String language, TopDocs docs, IndexReader reader) throws IOException { + LanguageWriter lw = new LanguageWriter(ticket, language); + Integer counter= lw.writeResults(docs, reader); + lw.writeResultInfo(docs,counter); + + } + + /** Gibt bekannt, dass sich ein Ticket geaendert hat. + * @param ticket + * @throws IOException + */ + public void commitTicket() throws IOException { + + FileWriter lastChangeFw = new FileWriter(lastChangeFile); + + Date currentDate = new java.util.Date(); + DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); + lastChangeFw.write(dateFormat.format(currentDate)); + lastChangeFw.close(); + + + } + + /**Schreibe ein Ergebnis in das Ticket + * @param language + * @param ocrDoc + * @throws IOException + */ + public void writeDoc(String language, OCRDoc ocrDoc) throws IOException { + LanguageWriter lw = new LanguageWriter(ticket, language); + lw.writeDoc(ocrDoc); + + } + + /** + * Ticket closed + * @throws IOException + */ + public void closeTicket() throws IOException { + ticketOpen=false; + FileWriter lastChangeFw = new FileWriter(lastChangeFile); + lastChangeFw.write("closed"); + lastChangeFw.close(); + + + }// TODO Auto-generated method stub + + + /** + * @param language + * close a ticket for a language + * @throws IOException + */ + public void closeTicket(String language) throws IOException { + String filename = lastChangeFile.getAbsolutePath()+"_"+language; + FileWriter lastChangeFw = new FileWriter(new File(filename)); + lastChangeFw.write("closed"); + lastChangeFw.close(); + ticketOpenLangs.put(language, false); + // gehe durch alle sprache und schliesse das gesamte ticket falls alle tickets fuer die sprachen geschlossen sind + + Boolean close = true; + for (String lang: ticketOpenLangs.keySet()){ + if (ticketOpenLangs.get(lang)) + close = false; + } + + if (close){ // kein ticket mehr offen dann + closeTicket(); + } + + }// TODO Auto-generated method stub + + + + + static public String getNewTicket(){ + Random rd = new Random(); + String tnr = String.valueOf(rd.nextInt()); + + File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+tnr); + while (ticketFile.exists()){ + tnr = String.valueOf(Math.abs(rd.nextInt())); + ticketFile = new File(TICKET_PATH+PATHSEPARATOR+tnr); + } + + return tnr; + } + + public static String getXML(String ticket) throws FileNotFoundException, IOException { + String ret=""+ticket+""; + File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); + if (!ticketFile.exists()){ + return null; + }else{ + File lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange"); + if (!lastChangeFile.exists()){ + return null; + }else{ + ret += ""+readFileToString(lastChangeFile)+""; + + } + File queryFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"query"); + if (!queryFile.exists()){ + return null; + }else{ + ret += ""+readFileToString(queryFile)+""; + + } + + //gehe jetzt durch das directory alle directory dort sind sprachen abhaengige resultsets + for (File file: ticketFile.listFiles()){ + if(file.isDirectory()){ + ret+=""+file.getName()+""; + } + } + ret +=""; + } + return ret; + } + + + + + + + + /** + * Hole das Ergebnis des Tickets in der entsprechenden Sprachen + * + * @param ticket + * @param lang + * @return + * @throws FileNotFoundException + * @throws IOException + */ + public static String getResultXML(String ticket, String lang) throws FileNotFoundException, IOException { + String ret=""+ticket+""; + File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); + if (!ticketFile.exists()){ + return null; + }else{ + File resultFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+lang+PATHSEPARATOR+"result"); + if (!resultFile.exists()){ + return null; + }else{ + ret += ""+readFileToString(resultFile)+""; + + } + ret +=""; + } + return ret; + } + + + /** + * Hole die Ergebnisse der entsprechende Sprache aus dem Ticket, + * eingeschrŠnkt auf durch from to angegebenen Datensaetze. + * Achtung: der erste Datensatz hat die Nummer 0. + * @param ticket + * @param lang + * @param from + * @param to + * @return + * @throws IOException + * @throws FileNotFoundException + * @throws JDOMException + */ + + public static String getResultXML(String ticket, String lang, + Integer from, Integer to) throws FileNotFoundException, IOException, JDOMException { + String ret = getResultXML(ticket, lang); + + String retresult=""+ticket+""; + + SAXBuilder builder = new SAXBuilder(); + InputStream stream = new ByteArrayInputStream(ret.getBytes("utf-8")); + Document doc = builder.build(stream); + XPath xpath = XPath.newInstance("//result"); + List resultsElements = xpath.selectNodes(doc); + if (resultsElements.size()>(from-1)) {// zaehlweise, der erste Datensatz hat die Nummer 0 + to = Math.min(to, resultsElements.size()-1); + for(int i =from;i<=to;i++){ + Element el = resultsElements.get(i); + String str = (new XMLOutputter()).outputString(el); + retresult+=str; + + } + } + + + + return retresult+ ""; + } + + + public static String getResultInfoXML(String ticket, String lang) throws IOException { + String ret=""; + File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); + if (!ticketFile.exists()){ + return null; + }else{ + File resultFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+lang+PATHSEPARATOR+"resultInfo"); + if (!resultFile.exists()){ + return null; + }else{ + ret += readFileToString(resultFile); + + } + return ret; + } + } + + + + + private static String getQueryString(String ticket) throws FileNotFoundException, IOException { + String ticketString = TICKET_PATH+PATHSEPARATOR+ticket; + File qf = new File(ticketString+PATHSEPARATOR+"query"); + + return readFileToString(qf); + } + + private static String getQueryString(String lang,String ticket) throws FileNotFoundException, IOException { + String ticketString = TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang; + File qf = new File(ticketString+PATHSEPARATOR+"query"); + + return readFileToString(qf); + } + + public static String getPagesWithHits(String ticket, String lang, + String textId) { + String ret=""; + File textIdFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId); + if (!textIdFile.exists()){ + return null; + }else{ + + //String url = String.format(DIGIVIEWBASICSTRING, generateImagePath(textId,pageFileName),generateMarksFromPoints(points)); + + + + + int pages = textIdFile.listFiles().length; + ret+=""+String.valueOf(pages)+""; + for (File file: textIdFile.listFiles()){ + ret+=""+file.getName()+""; + } + + ret +=""; + } + return ret; + } + + public static String getHitsOnPage(String ticket, String lang, + String textId, String restPath) throws FileNotFoundException, IOException { + String ret=""; + File pageFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId+PATHSEPARATOR+restPath); + if (!pageFile.exists()){ + return null; + }else{ + + String fl = readFileToString(pageFile); + String[] splitted = fl.split("\n"); + for (String lin:splitted){ + ret+=""+lin+""; + } + ret +=""; + } + return ret; + } + + private static String readFileToString(File lastChangeFile) + throws FileNotFoundException, IOException { + byte[] buffer = new byte[(int) lastChangeFile.length()]; + BufferedInputStream f = new BufferedInputStream(new FileInputStream(lastChangeFile)); + f.read(buffer); + + return new String(buffer,"utf-8"); + } + + public static boolean isClosed(String ticket) { + File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); + if (!ticketFile.exists()){ + return false; + }else{ + File lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange"); + if (!lastChangeFile.exists()){ + return false; + }else{ + try { + if (readFileToString(lastChangeFile).equals("closed")) + return true; + } catch (FileNotFoundException e) { + return false; + } catch (IOException e) { + return false; + } + + } + return false; + } + + } + + public static boolean exists(String ticket) { + File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); + return ticketFile.exists(); + + + + } + public static String getAllTickets() { + File ticketFolder = new File(TICKET_PATH); + String[] tickets = ticketFolder.list(); + String ret=""; + for (String ticket: tickets){ + ret+=""+ticket+""; + } + ret +=""; + return ret; + } + + + + public static void main(String[] args){ + if (args.length==0){ + System.out.println("Usage: TicketWriter -clean minutes, loescht alle tickets, die aelter als minutes sind."); + System.exit(1); + } + + if (args[0].equals("-clean")){ + if (args.length<2){ + System.out.println("Usage: TicketWriter -clean minutes, loescht alle tickets, die aelter als minutes sind."); + System.exit(1); + } + + try { + cleanTickets(args[1]); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } + + private static void cleanTickets(String minutes) throws IOException { + File ticketFolder = new File(TICKET_PATH); + File[] tickets = ticketFolder.listFiles(); + for (File ticket:tickets){ + File lmFile = new File(ticket.getAbsolutePath()+PATHSEPARATOR+"lastChange"); + long time = lmFile.lastModified(); + + long currentime = System.currentTimeMillis(); + + long diff = currentime-time; + + if (diff > Long.valueOf(minutes)*60*1000){ + FileUtils.deleteDirectory(ticket); + + } + + } + + } + + + + + +} + + +