0
|
1 package de.mpiwg.anteater.ml.preprocessing;
|
|
2
|
|
3 import java.io.File;
|
|
4 import java.io.FileWriter;
|
|
5 import java.io.IOException;
|
|
6 import java.io.InputStream;
|
|
7 import java.io.StringWriter;
|
|
8
|
|
9 import org.apache.commons.io.IOUtils;
|
|
10
|
|
11 import de.mpiwg.anteater.AnteaterConfiguration;
|
|
12 import de.mpiwg.anteater.ml.ITextParser;
|
|
13 import de.mpiwg.anteater.text.TextInformation;
|
|
14
|
|
15 public abstract class DataCreator {
|
|
16 public final static String COMPONENT_NAME = DataCreator.class.getSimpleName();
|
|
17
|
|
18 public final static String UNKNOWN_CLASS_SYMBOL = "?";
|
|
19
|
|
20 protected AnteaterConfiguration configuration;
|
|
21 private String filenamePrefix;
|
|
22
|
|
23 public DataCreator(AnteaterConfiguration configuration, String filenamePrefix) {
|
|
24 this.configuration = configuration;
|
|
25 this.filenamePrefix = filenamePrefix;
|
|
26 }
|
|
27
|
|
28 public abstract void createFileContents(TextInformation info, StringBuffer arffContents, ITextParser textParser);
|
|
29
|
|
30
|
|
31 public String createARFFFile(TextInformation info, ITextParser textParser) {
|
|
32 configuration.getLogger().logMessage(COMPONENT_NAME, "Creating location ARFF-file for " + info.getFilepath());
|
|
33
|
|
34
|
|
35 File file = new File(info.getFilepath());
|
|
36 String filename = file.getName();
|
|
37 String fname = filenamePrefix + filename.substring(0, filename.lastIndexOf("."));
|
|
38
|
|
39 File analysisFile = new File(configuration.getMlPath() + File.separator + fname + ".arff");
|
|
40 if (!analysisFile.exists()) {
|
|
41 try {
|
|
42 analysisFile.createNewFile();
|
|
43 } catch (IOException e) {
|
|
44 e.printStackTrace();
|
|
45 return null;
|
|
46 }
|
|
47 }
|
|
48 else
|
|
49 return analysisFile.getAbsolutePath();
|
|
50
|
|
51 StringWriter writer = new StringWriter();
|
|
52 InputStream stream = getClass().getResourceAsStream("template.arff");
|
|
53 try {
|
|
54 IOUtils.copy(stream, writer);
|
|
55 } catch (IOException e) {
|
|
56 e.printStackTrace();
|
|
57 return null;
|
|
58 }
|
|
59
|
|
60 StringBuffer arffContents = writer.getBuffer();
|
|
61
|
|
62
|
|
63 createFileContents(info, arffContents, textParser);
|
|
64
|
|
65
|
|
66 try {
|
|
67 FileWriter filewriter = new FileWriter(analysisFile);
|
|
68 filewriter.write(arffContents.toString());
|
|
69 filewriter.flush();
|
|
70 filewriter.close();
|
|
71 } catch (IOException e) {
|
|
72 // TODO Auto-generated catch block
|
|
73 e.printStackTrace();
|
|
74 }
|
|
75
|
|
76 return analysisFile.getAbsolutePath();
|
|
77 }
|
|
78 }
|