Mercurial > hg > fulltextSearch
view src/de/mpiwg/dwinter/fulltext/ticket/TicketWriter.java @ 2:2b29b0b6db16 default tip
Version mit integrierter Suche ?ber XML-Volltexte
author | dwinter |
---|---|
date | Wed, 26 Jan 2011 14:41:09 +0100 |
parents | 72a015318a6d |
children |
line wrap: on
line source
package de.mpiwg.dwinter.fulltext.ticket; import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.nio.CharBuffer; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import org.apache.commons.io.FileUtils; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; import org.jdom.output.XMLOutputter; import org.jdom.xpath.XPath; import com.sun.org.apache.xpath.internal.operations.Bool; import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc; /** * @author dwinter * * Schreibt die Ergebnisse einer Suche in ein Ticket */ public class TicketWriter { protected static Logger logger = Logger.getRootLogger(); private static final String TICKET_PATH = "/tmp/ticketfolder"; public static final String PATHSEPARATOR = "/"; public static final char PATHSEPARATORCHAR = '/'; public File ticketFile; private Map<String,LanguageWriter> languageFolders = new HashMap<String,LanguageWriter>(); private String ticket; private Boolean ticketOpen = false; private HashMap<String,Boolean> ticketOpenLangs = new HashMap<String,Boolean>(); private File lastChangeFile; /** Erzeuge TicketWriter fuer ein Ticker * @param ticket * @throws IOException */ public TicketWriter(String ticket) throws IOException { this.ticket = ticket; ticketOpen = true; ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); if(!ticketFile.exists()){ logger.debug("Create Ticketfolder:"+ticketFile.getCanonicalPath()); if(!ticketFile.mkdirs()) throw new IOException(); } lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange"); commitTicket(); } public TicketWriter(String ticket, Query query) throws IOException { this(ticket); saveQuery(query); } public TicketWriter(String ticket, Query query,ArrayList<String> languages) throws IOException { this(ticket); saveQuery(query); for (String lang: languages){ ticketOpenLangs.put(lang, true); } } public TicketWriter(String ticket, Query query, String language) throws IOException { this(ticket); saveQuery(query); LanguageWriter lw = new LanguageWriter(ticket, language); lw.saveQuery(query); } public void saveQuery(Query query) throws IOException { File qf = new File(ticketFile+PATHSEPARATOR+"query"); FileWriter fw = new FileWriter(qf); fw.write(query.toString()); fw.close(); } /** Sichert die Suchergebnisse fźr ein Ticket und eine Sprache. * @param language * @param docs * @param reader * @throws IOException */ public void writeResultsForLanguageSearch(String language, TopDocs docs, IndexReader reader) throws IOException { LanguageWriter lw = new LanguageWriter(ticket, language); Integer counter= lw.writeResults(docs, reader); lw.writeResultInfo(docs,counter); } /** Gibt bekannt, dass sich ein Ticket geaendert hat. * @param ticket * @throws IOException */ public void commitTicket() throws IOException { FileWriter lastChangeFw = new FileWriter(lastChangeFile); Date currentDate = new java.util.Date(); DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss"); lastChangeFw.write(dateFormat.format(currentDate)); lastChangeFw.close(); } /**Schreibe ein Ergebnis in das Ticket * @param language * @param ocrDoc * @throws IOException */ public void writeDoc(String language, OCRDoc ocrDoc) throws IOException { LanguageWriter lw = new LanguageWriter(ticket, language); lw.writeDoc(ocrDoc); } /** * Ticket closed * @throws IOException */ public void closeTicket() throws IOException { ticketOpen=false; FileWriter lastChangeFw = new FileWriter(lastChangeFile); lastChangeFw.write("closed"); lastChangeFw.close(); } /** * @param language * close a ticket for a language * @throws IOException */ public void closeTicket(String language) throws IOException { String filename = lastChangeFile.getAbsolutePath()+"_"+language; FileWriter lastChangeFw = new FileWriter(new File(filename)); lastChangeFw.write("closed"); lastChangeFw.close(); ticketOpenLangs.put(language, false); // gehe durch alle sprache und schliesse das gesamte ticket falls alle tickets fuer die sprachen geschlossen sind Boolean close = true; for (String lang: ticketOpenLangs.keySet()){ if (ticketOpenLangs.get(lang)) close = false; } if (close){ // kein ticket mehr offen dann closeTicket(); } }// TODO Auto-generated method stub static public String getNewTicket(){ Random rd = new Random(); String tnr = String.valueOf(rd.nextInt()); File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+tnr); while (ticketFile.exists()){ tnr = String.valueOf(Math.abs(rd.nextInt())); ticketFile = new File(TICKET_PATH+PATHSEPARATOR+tnr); } return tnr; } public static String getXML(String ticket) throws FileNotFoundException, IOException { String ret="<ticket><number>"+ticket+"</number>"; File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); if (!ticketFile.exists()){ return null; }else{ File lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange"); if (!lastChangeFile.exists()){ return null; }else{ ret += "<lastChanged>"+readFileToString(lastChangeFile)+"</lastChanged>"; } File queryFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"query"); if (!queryFile.exists()){ return null; }else{ ret += "<query>"+readFileToString(queryFile)+"</query>"; } //gehe jetzt durch das directory alle directory dort sind sprachen abhaengige resultsets for (File file: ticketFile.listFiles()){ if(file.isDirectory()){ ret+="<lang>"+file.getName()+"</lang>"; } } ret +="</ticket>"; } return ret; } /** * Hole das Ergebnis des Tickets in der entsprechenden Sprachen * * @param ticket * @param lang * @return * @throws FileNotFoundException * @throws IOException */ public static String getResultXML(String ticket, String lang) throws FileNotFoundException, IOException { String ret="<resultSet lang=\""+lang+"\" queryString=\""+getQueryString(lang,ticket)+"\"><number>"+ticket+"</number>"; File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); if (!ticketFile.exists()){ return null; }else{ File resultFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+lang+PATHSEPARATOR+"result"); if (!resultFile.exists()){ return null; }else{ ret += "<results>"+readFileToString(resultFile)+"</results>"; } ret +="</resultSet>"; } return ret; } /** * Hole die Ergebnisse der entsprechende Sprache aus dem Ticket, * eingeschrŠnkt auf durch from to angegebenen Datensaetze. * Achtung: der erste Datensatz hat die Nummer 0. * @param ticket * @param lang * @param from * @param to * @return * @throws IOException * @throws FileNotFoundException * @throws JDOMException */ public static String getResultXML(String ticket, String lang, Integer from, Integer to) throws FileNotFoundException, IOException, JDOMException { String ret = getResultXML(ticket, lang); String retresult="<resultSet lang=\""+lang+"\" queryString=\""+getQueryString(lang,ticket)+"\"><number>"+ticket+"</number><results>"; SAXBuilder builder = new SAXBuilder(); InputStream stream = new ByteArrayInputStream(ret.getBytes("utf-8")); Document doc = builder.build(stream); XPath xpath = XPath.newInstance("//result"); List<Element> resultsElements = xpath.selectNodes(doc); if (resultsElements.size()>(from-1)) {// zaehlweise, der erste Datensatz hat die Nummer 0 to = Math.min(to, resultsElements.size()-1); for(int i =from;i<=to;i++){ Element el = resultsElements.get(i); String str = (new XMLOutputter()).outputString(el); retresult+=str; } } return retresult+ "</results></resultSet>"; } public static String getResultInfoXML(String ticket, String lang) throws IOException { String ret=""; File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); if (!ticketFile.exists()){ return null; }else{ File resultFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+lang+PATHSEPARATOR+"resultInfo"); if (!resultFile.exists()){ return null; }else{ ret += readFileToString(resultFile); } return ret; } } private static String getQueryString(String ticket) throws FileNotFoundException, IOException { String ticketString = TICKET_PATH+PATHSEPARATOR+ticket; File qf = new File(ticketString+PATHSEPARATOR+"query"); return readFileToString(qf); } public static String getQueryString(String lang,String ticket) throws FileNotFoundException, IOException { String ticketString = TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang; File qf = new File(ticketString+PATHSEPARATOR+"query"); return readFileToString(qf); } public static String getPagesWithHits(String ticket, String lang, String textId) { String ret="<xml xmlns:xlink=\"http://www.w3.org/1999/xlink\">"; File textIdFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId); if (!textIdFile.exists()){ return null; }else{ //String url = String.format(DIGIVIEWBASICSTRING, generateImagePath(textId,pageFileName),generateMarksFromPoints(points)); int pages = textIdFile.listFiles().length; ret+="<numberOfPages>"+String.valueOf(pages)+"</numberOfPages><pages>"; for (File file: textIdFile.listFiles()){ ret+="<page xlink:href=\"tickets/"+ ticket+"/"+lang+"/"+textId +"/" +file.getName()+"\">"+file.getName()+"</page>"; } ret +="</pages></xml>"; } return ret; } public static String getHitsOnPage(String ticket, String lang, String textId, String restPath) throws FileNotFoundException, IOException { String ret="<xml>"; File pageFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId+PATHSEPARATOR+restPath); if (!pageFile.exists()){ return null; }else{ String fl = readFileToString(pageFile); String[] splitted = fl.split("\n"); for (String lin:splitted){ ret+="<line>"+lin+"</line>"; } ret +="</xml>"; } return ret; } private static String readFileToString(File lastChangeFile) throws FileNotFoundException, IOException { byte[] buffer = new byte[(int) lastChangeFile.length()]; BufferedInputStream f = new BufferedInputStream(new FileInputStream(lastChangeFile)); f.read(buffer); return new String(buffer,"utf-8"); } public static boolean isClosed(String ticket) { File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); if (!ticketFile.exists()){ return false; }else{ File lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange"); if (!lastChangeFile.exists()){ return false; }else{ try { if (readFileToString(lastChangeFile).equals("closed")) return true; } catch (FileNotFoundException e) { return false; } catch (IOException e) { return false; } } return false; } } public static boolean exists(String ticket) { File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket); return ticketFile.exists(); } public static String getAllTickets() { File ticketFolder = new File(TICKET_PATH); String[] tickets = ticketFolder.list(); String ret="<xml><tickets>"; for (String ticket: tickets){ ret+="<ticket><id>"+ticket+"</id></ticket>"; } ret +="</tickets></xml>"; return ret; } public static void main(String[] args){ if (args.length==0){ System.out.println("Usage: TicketWriter -clean minutes, loescht alle tickets, die aelter als minutes sind."); System.exit(1); } if (args[0].equals("-clean")){ if (args.length<2){ System.out.println("Usage: TicketWriter -clean minutes, loescht alle tickets, die aelter als minutes sind."); System.exit(1); } try { cleanTickets(args[1]); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } private static void cleanTickets(String minutes) throws IOException { File ticketFolder = new File(TICKET_PATH); File[] tickets = ticketFolder.listFiles(); for (File ticket:tickets){ File lmFile = new File(ticket.getAbsolutePath()+PATHSEPARATOR+"lastChange"); long time = lmFile.lastModified(); long currentime = System.currentTimeMillis(); long diff = currentime-time; if (diff > Long.valueOf(minutes)*60*1000){ FileUtils.deleteDirectory(ticket); } } } public static String getFileContent(String ticket, String lang, String textId, String pageFileName) throws FileNotFoundException, IOException { File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId+PATHSEPARATOR+pageFileName); return readFileToString(ticketFile); } }