annotate src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java @ 8:919e9f3b5efd

neue klassen zur textanalyse (stanford parser eingebaut) alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author dwinter
date Thu, 21 Jun 2012 17:08:22 +0200
parents fb3f3df002df
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fb3f3df002df first release
dwinter
parents:
diff changeset
1 package de.mpiwg.dwinter.duomo.lexdump;
fb3f3df002df first release
dwinter
parents:
diff changeset
2
fb3f3df002df first release
dwinter
parents:
diff changeset
3 import java.io.File;
fb3f3df002df first release
dwinter
parents:
diff changeset
4 import java.io.IOException;
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
5 import java.util.ArrayList;
0
fb3f3df002df first release
dwinter
parents:
diff changeset
6 import java.util.List;
fb3f3df002df first release
dwinter
parents:
diff changeset
7
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
8 import org.apache.commons.lang.StringUtils;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
9 import org.apache.log4j.Logger;
0
fb3f3df002df first release
dwinter
parents:
diff changeset
10 import org.jdom.Attribute;
fb3f3df002df first release
dwinter
parents:
diff changeset
11 import org.jdom.Document;
fb3f3df002df first release
dwinter
parents:
diff changeset
12 import org.jdom.Element;
fb3f3df002df first release
dwinter
parents:
diff changeset
13 import org.jdom.JDOMException;
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
14 import org.jdom.Text;
0
fb3f3df002df first release
dwinter
parents:
diff changeset
15 import org.jdom.input.SAXBuilder;
fb3f3df002df first release
dwinter
parents:
diff changeset
16 import org.jdom.xpath.XPath;
fb3f3df002df first release
dwinter
parents:
diff changeset
17
fb3f3df002df first release
dwinter
parents:
diff changeset
18
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
19
0
fb3f3df002df first release
dwinter
parents:
diff changeset
20 public class LexDumpImporter {
fb3f3df002df first release
dwinter
parents:
diff changeset
21
fb3f3df002df first release
dwinter
parents:
diff changeset
22 private Document doc;
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
23 private Logger logger;
0
fb3f3df002df first release
dwinter
parents:
diff changeset
24
fb3f3df002df first release
dwinter
parents:
diff changeset
25 public LexDumpImporter(String path) throws JDOMException, IOException{
fb3f3df002df first release
dwinter
parents:
diff changeset
26
fb3f3df002df first release
dwinter
parents:
diff changeset
27 SAXBuilder builder = new SAXBuilder();
fb3f3df002df first release
dwinter
parents:
diff changeset
28
fb3f3df002df first release
dwinter
parents:
diff changeset
29 doc = builder.build(new File(path));
fb3f3df002df first release
dwinter
parents:
diff changeset
30
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
31 logger = Logger.getRootLogger();
0
fb3f3df002df first release
dwinter
parents:
diff changeset
32 }
fb3f3df002df first release
dwinter
parents:
diff changeset
33
fb3f3df002df first release
dwinter
parents:
diff changeset
34 @SuppressWarnings("unchecked")
fb3f3df002df first release
dwinter
parents:
diff changeset
35 public List<Element> getCartas() throws JDOMException{
fb3f3df002df first release
dwinter
parents:
diff changeset
36 return (List<Element>)XPath.selectNodes(doc, "//carta");
fb3f3df002df first release
dwinter
parents:
diff changeset
37 }
fb3f3df002df first release
dwinter
parents:
diff changeset
38
fb3f3df002df first release
dwinter
parents:
diff changeset
39 public List<Element> getSignatures() throws JDOMException {
fb3f3df002df first release
dwinter
parents:
diff changeset
40 return (List<Element>)XPath.selectNodes(doc, "//segna");
fb3f3df002df first release
dwinter
parents:
diff changeset
41 }
fb3f3df002df first release
dwinter
parents:
diff changeset
42
fb3f3df002df first release
dwinter
parents:
diff changeset
43 public String getValue(Object context, String path) throws JDOMException {
fb3f3df002df first release
dwinter
parents:
diff changeset
44
fb3f3df002df first release
dwinter
parents:
diff changeset
45 Object node = XPath.selectSingleNode(context, path);
fb3f3df002df first release
dwinter
parents:
diff changeset
46
fb3f3df002df first release
dwinter
parents:
diff changeset
47 if (node==null){
fb3f3df002df first release
dwinter
parents:
diff changeset
48 return "";
fb3f3df002df first release
dwinter
parents:
diff changeset
49 } else if (Element.class.isInstance(node)){
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
50 List<String> retArray=new ArrayList<String>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
51 for (Object o: ((Element)node).getContent())
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
52 {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
53 if(Element.class.isInstance(o)){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
54 retArray.add(((Element)o).getTextTrim());
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
55 } else if(Text.class.isInstance(o)) {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
56 retArray.add(((Text)o).getTextTrim());
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
57 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
58 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
59 Object[] X = retArray.toArray();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
60 return StringUtils.join(X,' ');
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents: 0
diff changeset
61 //return ((Element)node).getTextTrim();
0
fb3f3df002df first release
dwinter
parents:
diff changeset
62 } else if (Attribute.class.isInstance(node)){
fb3f3df002df first release
dwinter
parents:
diff changeset
63 return ((Attribute)node).getValue();
fb3f3df002df first release
dwinter
parents:
diff changeset
64 }
fb3f3df002df first release
dwinter
parents:
diff changeset
65
fb3f3df002df first release
dwinter
parents:
diff changeset
66 return "";
fb3f3df002df first release
dwinter
parents:
diff changeset
67 }
fb3f3df002df first release
dwinter
parents:
diff changeset
68 }