Mercurial > hg > duomoOWLProject
comparison src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java @ 8:919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author | dwinter |
---|---|
date | Thu, 21 Jun 2012 17:08:22 +0200 |
parents | fb3f3df002df |
children |
comparison
equal
deleted
inserted
replaced
7:19e40abb3e8a | 8:919e9f3b5efd |
---|---|
1 package de.mpiwg.dwinter.duomo.lexdump; | 1 package de.mpiwg.dwinter.duomo.lexdump; |
2 | 2 |
3 import java.io.File; | 3 import java.io.File; |
4 import java.io.IOException; | 4 import java.io.IOException; |
5 import java.util.ArrayList; | |
5 import java.util.List; | 6 import java.util.List; |
6 | 7 |
8 import org.apache.commons.lang.StringUtils; | |
9 import org.apache.log4j.Logger; | |
7 import org.jdom.Attribute; | 10 import org.jdom.Attribute; |
8 import org.jdom.Document; | 11 import org.jdom.Document; |
9 import org.jdom.Element; | 12 import org.jdom.Element; |
10 import org.jdom.JDOMException; | 13 import org.jdom.JDOMException; |
14 import org.jdom.Text; | |
11 import org.jdom.input.SAXBuilder; | 15 import org.jdom.input.SAXBuilder; |
12 import org.jdom.xpath.XPath; | 16 import org.jdom.xpath.XPath; |
17 | |
13 | 18 |
14 | 19 |
15 public class LexDumpImporter { | 20 public class LexDumpImporter { |
16 | 21 |
17 private Document doc; | 22 private Document doc; |
23 private Logger logger; | |
18 | 24 |
19 public LexDumpImporter(String path) throws JDOMException, IOException{ | 25 public LexDumpImporter(String path) throws JDOMException, IOException{ |
20 | 26 |
21 SAXBuilder builder = new SAXBuilder(); | 27 SAXBuilder builder = new SAXBuilder(); |
22 | 28 |
23 doc = builder.build(new File(path)); | 29 doc = builder.build(new File(path)); |
24 | 30 |
31 logger = Logger.getRootLogger(); | |
25 } | 32 } |
26 | 33 |
27 @SuppressWarnings("unchecked") | 34 @SuppressWarnings("unchecked") |
28 public List<Element> getCartas() throws JDOMException{ | 35 public List<Element> getCartas() throws JDOMException{ |
29 return (List<Element>)XPath.selectNodes(doc, "//carta"); | 36 return (List<Element>)XPath.selectNodes(doc, "//carta"); |
38 Object node = XPath.selectSingleNode(context, path); | 45 Object node = XPath.selectSingleNode(context, path); |
39 | 46 |
40 if (node==null){ | 47 if (node==null){ |
41 return ""; | 48 return ""; |
42 } else if (Element.class.isInstance(node)){ | 49 } else if (Element.class.isInstance(node)){ |
43 return ((Element)node).getTextTrim(); | 50 List<String> retArray=new ArrayList<String>(); |
51 for (Object o: ((Element)node).getContent()) | |
52 { | |
53 if(Element.class.isInstance(o)){ | |
54 retArray.add(((Element)o).getTextTrim()); | |
55 } else if(Text.class.isInstance(o)) { | |
56 retArray.add(((Text)o).getTextTrim()); | |
57 } | |
58 } | |
59 Object[] X = retArray.toArray(); | |
60 return StringUtils.join(X,' '); | |
61 //return ((Element)node).getTextTrim(); | |
44 } else if (Attribute.class.isInstance(node)){ | 62 } else if (Attribute.class.isInstance(node)){ |
45 return ((Attribute)node).getValue(); | 63 return ((Attribute)node).getValue(); |
46 } | 64 } |
47 | 65 |
48 return ""; | 66 return ""; |