view src/de/mpiwg/anteater/ml/preprocessing/DataCreator.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line source

package de.mpiwg.anteater.ml.preprocessing;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;

import org.apache.commons.io.IOUtils;

import de.mpiwg.anteater.AnteaterConfiguration;
import de.mpiwg.anteater.ml.ITextParser;
import de.mpiwg.anteater.text.TextInformation;

public abstract class DataCreator {
	public final static String COMPONENT_NAME = DataCreator.class.getSimpleName();
	
	public final static String UNKNOWN_CLASS_SYMBOL = "?";
	
	protected AnteaterConfiguration configuration;
	private String filenamePrefix;
	
	public DataCreator(AnteaterConfiguration configuration, String filenamePrefix) {
		this.configuration = configuration;
		this.filenamePrefix = filenamePrefix;
	}
	
	public abstract void createFileContents(TextInformation info, StringBuffer arffContents, ITextParser textParser);
	

	public String createARFFFile(TextInformation info, ITextParser textParser) {
		configuration.getLogger().logMessage(COMPONENT_NAME, "Creating location ARFF-file for " + info.getFilepath());
		
		
		File file = new File(info.getFilepath());
		String filename = file.getName();
		String fname = filenamePrefix + filename.substring(0, filename.lastIndexOf("."));
		
		File analysisFile = new File(configuration.getMlPath() + File.separator + fname + ".arff");
		if (!analysisFile.exists()) {
			try {
				analysisFile.createNewFile();
			} catch (IOException e) {
				e.printStackTrace();
				return null;
			}
		}
		else
			return analysisFile.getAbsolutePath();
		
		StringWriter writer = new StringWriter();
		InputStream stream = getClass().getResourceAsStream("template.arff");
		try {
			IOUtils.copy(stream, writer);
		} catch (IOException e) {
			e.printStackTrace();
			return null;
		}
		
		StringBuffer arffContents = writer.getBuffer();
		
		
		createFileContents(info, arffContents, textParser);
		
		
		try {
			FileWriter filewriter = new FileWriter(analysisFile);
			filewriter.write(arffContents.toString());
			filewriter.flush();
			filewriter.close();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		return analysisFile.getAbsolutePath();
	}
}