comparison src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java @ 8:919e9f3b5efd

neue klassen zur textanalyse (stanford parser eingebaut) alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author dwinter
date Thu, 21 Jun 2012 17:08:22 +0200
parents fb3f3df002df
children
comparison
equal deleted inserted replaced
7:19e40abb3e8a 8:919e9f3b5efd
1 package de.mpiwg.dwinter.duomo.lexdump; 1 package de.mpiwg.dwinter.duomo.lexdump;
2 2
3 import java.io.File; 3 import java.io.File;
4 import java.io.IOException; 4 import java.io.IOException;
5 import java.util.ArrayList;
5 import java.util.List; 6 import java.util.List;
6 7
8 import org.apache.commons.lang.StringUtils;
9 import org.apache.log4j.Logger;
7 import org.jdom.Attribute; 10 import org.jdom.Attribute;
8 import org.jdom.Document; 11 import org.jdom.Document;
9 import org.jdom.Element; 12 import org.jdom.Element;
10 import org.jdom.JDOMException; 13 import org.jdom.JDOMException;
14 import org.jdom.Text;
11 import org.jdom.input.SAXBuilder; 15 import org.jdom.input.SAXBuilder;
12 import org.jdom.xpath.XPath; 16 import org.jdom.xpath.XPath;
17
13 18
14 19
15 public class LexDumpImporter { 20 public class LexDumpImporter {
16 21
17 private Document doc; 22 private Document doc;
23 private Logger logger;
18 24
19 public LexDumpImporter(String path) throws JDOMException, IOException{ 25 public LexDumpImporter(String path) throws JDOMException, IOException{
20 26
21 SAXBuilder builder = new SAXBuilder(); 27 SAXBuilder builder = new SAXBuilder();
22 28
23 doc = builder.build(new File(path)); 29 doc = builder.build(new File(path));
24 30
31 logger = Logger.getRootLogger();
25 } 32 }
26 33
27 @SuppressWarnings("unchecked") 34 @SuppressWarnings("unchecked")
28 public List<Element> getCartas() throws JDOMException{ 35 public List<Element> getCartas() throws JDOMException{
29 return (List<Element>)XPath.selectNodes(doc, "//carta"); 36 return (List<Element>)XPath.selectNodes(doc, "//carta");
38 Object node = XPath.selectSingleNode(context, path); 45 Object node = XPath.selectSingleNode(context, path);
39 46
40 if (node==null){ 47 if (node==null){
41 return ""; 48 return "";
42 } else if (Element.class.isInstance(node)){ 49 } else if (Element.class.isInstance(node)){
43 return ((Element)node).getTextTrim(); 50 List<String> retArray=new ArrayList<String>();
51 for (Object o: ((Element)node).getContent())
52 {
53 if(Element.class.isInstance(o)){
54 retArray.add(((Element)o).getTextTrim());
55 } else if(Text.class.isInstance(o)) {
56 retArray.add(((Text)o).getTextTrim());
57 }
58 }
59 Object[] X = retArray.toArray();
60 return StringUtils.join(X,' ');
61 //return ((Element)node).getTextTrim();
44 } else if (Attribute.class.isInstance(node)){ 62 } else if (Attribute.class.isInstance(node)){
45 return ((Attribute)node).getValue(); 63 return ((Attribute)node).getValue();
46 } 64 }
47 65
48 return ""; 66 return "";