diff src/de/mpiwg/anteater/ml/preprocessing/DataCreator.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/anteater/ml/preprocessing/DataCreator.java	Fri Sep 14 10:30:43 2012 +0200
@@ -0,0 +1,78 @@
+package de.mpiwg.anteater.ml.preprocessing;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.StringWriter;
+
+import org.apache.commons.io.IOUtils;
+
+import de.mpiwg.anteater.AnteaterConfiguration;
+import de.mpiwg.anteater.ml.ITextParser;
+import de.mpiwg.anteater.text.TextInformation;
+
+public abstract class DataCreator {
+	public final static String COMPONENT_NAME = DataCreator.class.getSimpleName();
+	
+	public final static String UNKNOWN_CLASS_SYMBOL = "?";
+	
+	protected AnteaterConfiguration configuration;
+	private String filenamePrefix;
+	
+	public DataCreator(AnteaterConfiguration configuration, String filenamePrefix) {
+		this.configuration = configuration;
+		this.filenamePrefix = filenamePrefix;
+	}
+	
+	public abstract void createFileContents(TextInformation info, StringBuffer arffContents, ITextParser textParser);
+	
+
+	public String createARFFFile(TextInformation info, ITextParser textParser) {
+		configuration.getLogger().logMessage(COMPONENT_NAME, "Creating location ARFF-file for " + info.getFilepath());
+		
+		
+		File file = new File(info.getFilepath());
+		String filename = file.getName();
+		String fname = filenamePrefix + filename.substring(0, filename.lastIndexOf("."));
+		
+		File analysisFile = new File(configuration.getMlPath() + File.separator + fname + ".arff");
+		if (!analysisFile.exists()) {
+			try {
+				analysisFile.createNewFile();
+			} catch (IOException e) {
+				e.printStackTrace();
+				return null;
+			}
+		}
+		else
+			return analysisFile.getAbsolutePath();
+		
+		StringWriter writer = new StringWriter();
+		InputStream stream = getClass().getResourceAsStream("template.arff");
+		try {
+			IOUtils.copy(stream, writer);
+		} catch (IOException e) {
+			e.printStackTrace();
+			return null;
+		}
+		
+		StringBuffer arffContents = writer.getBuffer();
+		
+		
+		createFileContents(info, arffContents, textParser);
+		
+		
+		try {
+			FileWriter filewriter = new FileWriter(analysisFile);
+			filewriter.write(arffContents.toString());
+			filewriter.flush();
+			filewriter.close();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		
+		return analysisFile.getAbsolutePath();
+	}
+}