view src/de/mpiwg/dwinter/fulltext/ticket/TicketWriter.java @ 2:2b29b0b6db16 default tip

Version mit integrierter Suche ?ber XML-Volltexte
author dwinter
date Wed, 26 Jan 2011 14:41:09 +0100
parents 72a015318a6d
children
line wrap: on
line source

package de.mpiwg.dwinter.fulltext.ticket;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.nio.CharBuffer;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;


import org.apache.commons.io.FileUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.XMLOutputter;
import org.jdom.xpath.XPath;

import com.sun.org.apache.xpath.internal.operations.Bool;



import de.mpiwg.dwinter.fulltext.search.utils.OCRDoc;

/**
 * @author dwinter
 *
 * Schreibt die Ergebnisse einer Suche in ein Ticket
 */
public class TicketWriter {

    protected static Logger logger = Logger.getRootLogger();
    
	private static final String TICKET_PATH = "/tmp/ticketfolder";

	public static final String PATHSEPARATOR = "/";
	public static final char PATHSEPARATORCHAR = '/';
	public File ticketFile;
	private Map<String,LanguageWriter> languageFolders = new HashMap<String,LanguageWriter>();

	private String ticket;
	private Boolean ticketOpen = false;
	private HashMap<String,Boolean> ticketOpenLangs = new HashMap<String,Boolean>();
	private File lastChangeFile;


	/** Erzeuge TicketWriter fuer ein Ticker
	 * @param ticket
	 * @throws IOException 
	 */
	public TicketWriter(String ticket) throws IOException {
		
		
		this.ticket = ticket;
		ticketOpen = true;
		ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
		if(!ticketFile.exists()){
			logger.debug("Create Ticketfolder:"+ticketFile.getCanonicalPath());
			if(!ticketFile.mkdirs())
				throw new IOException();
			
		}
			
		lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange");
	
		commitTicket();
		
	}

	public TicketWriter(String ticket, Query query) throws IOException {
		this(ticket);
		saveQuery(query);
		
		
	}
	
	public TicketWriter(String ticket, Query query,ArrayList<String> languages) throws IOException {
		this(ticket);
		saveQuery(query);
		for (String lang: languages){
			ticketOpenLangs.put(lang, true);
		}
		
	}

	
	public TicketWriter(String ticket, Query query, String language) throws IOException {
		this(ticket);
		saveQuery(query);
		
		LanguageWriter lw = new LanguageWriter(ticket, language);
		lw.saveQuery(query);
	}

	public void saveQuery(Query query) throws IOException {
		
		File qf = new File(ticketFile+PATHSEPARATOR+"query");
		FileWriter fw = new FileWriter(qf);
		fw.write(query.toString());
		fw.close();
	}

	/** Sichert die Suchergebnisse fźr ein Ticket und eine Sprache. 
	 * @param language 
	 * @param docs
	 * @param reader 
	 * @throws IOException 
	 */
	public void writeResultsForLanguageSearch(String language, TopDocs docs, IndexReader reader) throws IOException {
		LanguageWriter lw = new LanguageWriter(ticket, language);
		Integer counter= lw.writeResults(docs, reader);
		lw.writeResultInfo(docs,counter);
		
	}

	/** Gibt bekannt, dass sich ein Ticket geaendert hat.
	 * @param ticket
	 * @throws IOException 
	 */ 
	public void commitTicket() throws IOException {
		
		FileWriter lastChangeFw = new FileWriter(lastChangeFile);
		
		Date currentDate = new java.util.Date();
		DateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
		lastChangeFw.write(dateFormat.format(currentDate));
		lastChangeFw.close();
		
		
	}

	/**Schreibe ein Ergebnis in das Ticket
	 * @param language 
	 * @param ocrDoc
	 * @throws IOException 
	 */
	public void writeDoc(String language, OCRDoc ocrDoc) throws IOException {
		LanguageWriter lw = new LanguageWriter(ticket, language);
		lw.writeDoc(ocrDoc);
		
	}

	/**
	 * Ticket closed
	 * @throws IOException 
	 */
	public void closeTicket() throws IOException {
			ticketOpen=false;
			FileWriter lastChangeFw = new FileWriter(lastChangeFile);
			lastChangeFw.write("closed");
			lastChangeFw.close();
			
			
		}
		
	
	/**
	 * @param language
	 * close a ticket for a language
	 * @throws IOException 
	 */
	public void closeTicket(String language) throws IOException {
		String filename = lastChangeFile.getAbsolutePath()+"_"+language;
		FileWriter lastChangeFw = new FileWriter(new File(filename));
		lastChangeFw.write("closed");
		lastChangeFw.close();
		ticketOpenLangs.put(language, false);
		// gehe durch alle sprache und schliesse das gesamte ticket falls alle tickets fuer die sprachen geschlossen sind
		
		Boolean close = true;
		for (String lang: ticketOpenLangs.keySet()){
			if (ticketOpenLangs.get(lang))
				close = false;
		}
		
		if (close){ // kein ticket mehr offen dann
			closeTicket();
		}
		
	}// TODO Auto-generated method stub
	
	


	static public String getNewTicket(){
		Random rd = new Random();
		String tnr = String.valueOf(rd.nextInt());
		
		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+tnr);
		while (ticketFile.exists()){
			tnr = String.valueOf(Math.abs(rd.nextInt()));
			ticketFile = new File(TICKET_PATH+PATHSEPARATOR+tnr);
		}
		
		return tnr;
	}

	public static String getXML(String ticket) throws FileNotFoundException, IOException {
		String ret="<ticket><number>"+ticket+"</number>";
		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
		if (!ticketFile.exists()){
			return null;
		}else{
			File lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange");
			if (!lastChangeFile.exists()){
				return null;
			}else{
				 ret += "<lastChanged>"+readFileToString(lastChangeFile)+"</lastChanged>";
				
			}
			File queryFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"query");
			if (!queryFile.exists()){
				return null;
			}else{
				 ret += "<query>"+readFileToString(queryFile)+"</query>";
				
			}
			
			//gehe jetzt durch das directory alle directory dort sind sprachen abhaengige resultsets
			for (File file: ticketFile.listFiles()){
				if(file.isDirectory()){
					ret+="<lang>"+file.getName()+"</lang>";
				}
			}
			ret +="</ticket>";
		}
		return ret;
	}
	

	

	
	

	/**
	 * Hole das Ergebnis des Tickets in der entsprechenden Sprachen 
	 *
	 * @param ticket
	 * @param lang
	 * @return
	 * @throws FileNotFoundException
	 * @throws IOException
	 */
	public static String getResultXML(String ticket, String lang) throws FileNotFoundException, IOException {
		String ret="<resultSet lang=\""+lang+"\"  queryString=\""+getQueryString(lang,ticket)+"\"><number>"+ticket+"</number>";
		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
		if (!ticketFile.exists()){
			return null;
		}else{
			File resultFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+lang+PATHSEPARATOR+"result");
			if (!resultFile.exists()){
				return null;
			}else{
				 ret += "<results>"+readFileToString(resultFile)+"</results>";
				
			}
			ret +="</resultSet>";
		}
		return ret;
	}
	
	
	/**
	 * Hole die Ergebnisse der entsprechende Sprache aus dem Ticket, 
	 * eingeschrŠnkt auf durch from to angegebenen Datensaetze.
	 * Achtung: der erste Datensatz hat die Nummer 0.
	 * @param ticket
	 * @param lang
	 * @param from
	 * @param to
	 * @return
	 * @throws IOException 
	 * @throws FileNotFoundException 
	 * @throws JDOMException 
	 */
	
	public static String getResultXML(String ticket, String lang,
			Integer from, Integer to) throws FileNotFoundException, IOException, JDOMException {
		String ret = getResultXML(ticket, lang);
		
		String retresult="<resultSet lang=\""+lang+"\"  queryString=\""+getQueryString(lang,ticket)+"\"><number>"+ticket+"</number><results>";
		
		SAXBuilder builder = new SAXBuilder();
		InputStream stream = new ByteArrayInputStream(ret.getBytes("utf-8"));
		Document doc = builder.build(stream);
		XPath xpath = XPath.newInstance("//result");
		List<Element> resultsElements = xpath.selectNodes(doc);
		if (resultsElements.size()>(from-1)) {// zaehlweise, der erste Datensatz hat die Nummer 0
			to = Math.min(to, resultsElements.size()-1); 
			for(int i =from;i<=to;i++){
				Element el = resultsElements.get(i);
				String str = (new XMLOutputter()).outputString(el);
				retresult+=str;
				
			}
		}
		
			
		
		return retresult+ "</results></resultSet>";
	}
	
	
	public static String getResultInfoXML(String ticket, String lang) throws IOException {
		String ret="";
		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
		if (!ticketFile.exists()){
			return null;
		}else{
			File resultFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+lang+PATHSEPARATOR+"resultInfo");
			if (!resultFile.exists()){
				return null;
			}else{
				 ret += readFileToString(resultFile);		
			
			}
		return ret;
		}
	}
	
		
		
	
	private  static String getQueryString(String ticket) throws FileNotFoundException, IOException {
		String ticketString = TICKET_PATH+PATHSEPARATOR+ticket;
		File qf = new File(ticketString+PATHSEPARATOR+"query");
		
		return readFileToString(qf);
	}
	
	public static String getQueryString(String lang,String ticket) throws FileNotFoundException, IOException {
		String ticketString = TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang;
		File qf = new File(ticketString+PATHSEPARATOR+"query");
		
		return readFileToString(qf);
	}

	public static String getPagesWithHits(String ticket, String lang,
			String textId) {
		String ret="<xml xmlns:xlink=\"http://www.w3.org/1999/xlink\">";
		File textIdFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId);
		if (!textIdFile.exists()){
			return null;
		}else{
			
			//String url = String.format(DIGIVIEWBASICSTRING, generateImagePath(textId,pageFileName),generateMarksFromPoints(points));
			
			
	
		
			int pages = textIdFile.listFiles().length;
			ret+="<numberOfPages>"+String.valueOf(pages)+"</numberOfPages><pages>";
			for (File file: textIdFile.listFiles()){
				ret+="<page xlink:href=\"tickets/"+ ticket+"/"+lang+"/"+textId  +"/" +file.getName()+"\">"+file.getName()+"</page>";
			}
			
			ret +="</pages></xml>";
		}
		return ret;
	}
	
	public static String getHitsOnPage(String ticket, String lang,
			String textId, String restPath) throws FileNotFoundException, IOException {
		String ret="<xml>";
		File pageFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId+PATHSEPARATOR+restPath);
		if (!pageFile.exists()){
			return null;
		}else{
			
			String fl = readFileToString(pageFile);
			String[] splitted = fl.split("\n");
			for (String lin:splitted){
				ret+="<line>"+lin+"</line>";
			}
			ret +="</xml>";
		}
		return ret;
	}

	private static String readFileToString(File lastChangeFile)
			throws FileNotFoundException, IOException {
		byte[] buffer = new byte[(int) lastChangeFile.length()];
		 BufferedInputStream f = new BufferedInputStream(new FileInputStream(lastChangeFile));
		 f.read(buffer);
		 
		return new String(buffer,"utf-8");
	}

	public static boolean isClosed(String ticket) {
		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
		if (!ticketFile.exists()){
			return false;
		}else{
			File lastChangeFile = new File(ticketFile.getAbsoluteFile()+PATHSEPARATOR+"lastChange");
			if (!lastChangeFile.exists()){
				return false;
			}else{
				 try {
					if (readFileToString(lastChangeFile).equals("closed"))
						 return true;
				} catch (FileNotFoundException e) {		
					return false;
				} catch (IOException e) {
					return false;
				}
				
			}
		return false;
		}
	
	}
	
	public static boolean exists(String ticket) {
		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket);
		return ticketFile.exists();
		
	
		
	}
	public static String getAllTickets() {
		File ticketFolder = new File(TICKET_PATH);
		String[] tickets = ticketFolder.list();
		String ret="<xml><tickets>";
		for (String ticket: tickets){
			ret+="<ticket><id>"+ticket+"</id></ticket>";
		}
		ret +="</tickets></xml>";
		return ret;
	}
	
	

	public static void main(String[] args){
		if (args.length==0){
			 System.out.println("Usage: TicketWriter -clean minutes, loescht alle tickets, die aelter als minutes sind."); 
			 System.exit(1);
		}
		
		if (args[0].equals("-clean")){
			if (args.length<2){
				 System.out.println("Usage: TicketWriter -clean minutes, loescht alle tickets, die aelter als minutes sind."); 
				 System.exit(1);
			}
			
			try {
				cleanTickets(args[1]);
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
	}

	private static void cleanTickets(String minutes) throws IOException {
		File ticketFolder = new File(TICKET_PATH);
		 File[] tickets = ticketFolder.listFiles();
		 for (File ticket:tickets){
			File lmFile = new File(ticket.getAbsolutePath()+PATHSEPARATOR+"lastChange");
			long time = lmFile.lastModified();
			
			long currentime = System.currentTimeMillis();
			
			long diff = currentime-time;
			
			if (diff > Long.valueOf(minutes)*60*1000){
				FileUtils.deleteDirectory(ticket);
				
			}
			 
		 }
		
	}

	public static String getFileContent(String ticket, String lang, String textId,
			String pageFileName) throws FileNotFoundException, IOException  {
		File ticketFile = new File(TICKET_PATH+PATHSEPARATOR+ticket+PATHSEPARATOR+lang+PATHSEPARATOR+textId+PATHSEPARATOR+pageFileName);
		return readFileToString(ticketFile);

	}





}