Mercurial > hg > duomoOWLProject
diff src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java @ 8:919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author | dwinter |
---|---|
date | Thu, 21 Jun 2012 17:08:22 +0200 |
parents | fb3f3df002df |
children |
line wrap: on
line diff
--- a/src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java Wed Feb 09 16:36:36 2011 +0100 +++ b/src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java Thu Jun 21 17:08:22 2012 +0200 @@ -2,19 +2,25 @@ import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.List; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; +import org.jdom.Text; import org.jdom.input.SAXBuilder; import org.jdom.xpath.XPath; + public class LexDumpImporter { private Document doc; + private Logger logger; public LexDumpImporter(String path) throws JDOMException, IOException{ @@ -22,6 +28,7 @@ doc = builder.build(new File(path)); + logger = Logger.getRootLogger(); } @SuppressWarnings("unchecked") @@ -40,7 +47,18 @@ if (node==null){ return ""; } else if (Element.class.isInstance(node)){ - return ((Element)node).getTextTrim(); + List<String> retArray=new ArrayList<String>(); + for (Object o: ((Element)node).getContent()) + { + if(Element.class.isInstance(o)){ + retArray.add(((Element)o).getTextTrim()); + } else if(Text.class.isInstance(o)) { + retArray.add(((Text)o).getTextTrim()); + } + } + Object[] X = retArray.toArray(); + return StringUtils.join(X,' '); + //return ((Element)node).getTextTrim(); } else if (Attribute.class.isInstance(node)){ return ((Attribute)node).getValue(); }