Mercurial > hg > duomoOWLProject
annotate src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java @ 8:919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author | dwinter |
---|---|
date | Thu, 21 Jun 2012 17:08:22 +0200 |
parents | fb3f3df002df |
children |
rev | line source |
---|---|
0 | 1 package de.mpiwg.dwinter.duomo.lexdump; |
2 | |
3 import java.io.File; | |
4 import java.io.IOException; | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
5 import java.util.ArrayList; |
0 | 6 import java.util.List; |
7 | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
8 import org.apache.commons.lang.StringUtils; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
9 import org.apache.log4j.Logger; |
0 | 10 import org.jdom.Attribute; |
11 import org.jdom.Document; | |
12 import org.jdom.Element; | |
13 import org.jdom.JDOMException; | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
14 import org.jdom.Text; |
0 | 15 import org.jdom.input.SAXBuilder; |
16 import org.jdom.xpath.XPath; | |
17 | |
18 | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
19 |
0 | 20 public class LexDumpImporter { |
21 | |
22 private Document doc; | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
23 private Logger logger; |
0 | 24 |
25 public LexDumpImporter(String path) throws JDOMException, IOException{ | |
26 | |
27 SAXBuilder builder = new SAXBuilder(); | |
28 | |
29 doc = builder.build(new File(path)); | |
30 | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
31 logger = Logger.getRootLogger(); |
0 | 32 } |
33 | |
34 @SuppressWarnings("unchecked") | |
35 public List<Element> getCartas() throws JDOMException{ | |
36 return (List<Element>)XPath.selectNodes(doc, "//carta"); | |
37 } | |
38 | |
39 public List<Element> getSignatures() throws JDOMException { | |
40 return (List<Element>)XPath.selectNodes(doc, "//segna"); | |
41 } | |
42 | |
43 public String getValue(Object context, String path) throws JDOMException { | |
44 | |
45 Object node = XPath.selectSingleNode(context, path); | |
46 | |
47 if (node==null){ | |
48 return ""; | |
49 } else if (Element.class.isInstance(node)){ | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
50 List<String> retArray=new ArrayList<String>(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
51 for (Object o: ((Element)node).getContent()) |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
52 { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
53 if(Element.class.isInstance(o)){ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
54 retArray.add(((Element)o).getTextTrim()); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
55 } else if(Text.class.isInstance(o)) { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
56 retArray.add(((Text)o).getTextTrim()); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
57 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
58 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
59 Object[] X = retArray.toArray(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
60 return StringUtils.join(X,' '); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
0
diff
changeset
|
61 //return ((Element)node).getTextTrim(); |
0 | 62 } else if (Attribute.class.isInstance(node)){ |
63 return ((Attribute)node).getValue(); | |
64 } | |
65 | |
66 return ""; | |
67 } | |
68 } |