Mercurial > hg > anteater
diff src/de/mpiwg/anteater/ml/preprocessing/DataCreator.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/anteater/ml/preprocessing/DataCreator.java Fri Sep 14 10:30:43 2012 +0200 @@ -0,0 +1,78 @@ +package de.mpiwg.anteater.ml.preprocessing; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; + +import org.apache.commons.io.IOUtils; + +import de.mpiwg.anteater.AnteaterConfiguration; +import de.mpiwg.anteater.ml.ITextParser; +import de.mpiwg.anteater.text.TextInformation; + +public abstract class DataCreator { + public final static String COMPONENT_NAME = DataCreator.class.getSimpleName(); + + public final static String UNKNOWN_CLASS_SYMBOL = "?"; + + protected AnteaterConfiguration configuration; + private String filenamePrefix; + + public DataCreator(AnteaterConfiguration configuration, String filenamePrefix) { + this.configuration = configuration; + this.filenamePrefix = filenamePrefix; + } + + public abstract void createFileContents(TextInformation info, StringBuffer arffContents, ITextParser textParser); + + + public String createARFFFile(TextInformation info, ITextParser textParser) { + configuration.getLogger().logMessage(COMPONENT_NAME, "Creating location ARFF-file for " + info.getFilepath()); + + + File file = new File(info.getFilepath()); + String filename = file.getName(); + String fname = filenamePrefix + filename.substring(0, filename.lastIndexOf(".")); + + File analysisFile = new File(configuration.getMlPath() + File.separator + fname + ".arff"); + if (!analysisFile.exists()) { + try { + analysisFile.createNewFile(); + } catch (IOException e) { + e.printStackTrace(); + return null; + } + } + else + return analysisFile.getAbsolutePath(); + + StringWriter writer = new StringWriter(); + InputStream stream = getClass().getResourceAsStream("template.arff"); + try { + IOUtils.copy(stream, writer); + } catch (IOException e) { + e.printStackTrace(); + return null; + } + + StringBuffer arffContents = writer.getBuffer(); + + + createFileContents(info, arffContents, textParser); + + + try { + FileWriter filewriter = new FileWriter(analysisFile); + filewriter.write(arffContents.toString()); + filewriter.flush(); + filewriter.close(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + return analysisFile.getAbsolutePath(); + } +}