Mercurial > hg > duomoOWLProject
view src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java @ 8:919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author | dwinter |
---|---|
date | Thu, 21 Jun 2012 17:08:22 +0200 |
parents | fb3f3df002df |
children |
line wrap: on
line source
package de.mpiwg.dwinter.duomo.lexdump; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.Text; import org.jdom.input.SAXBuilder; import org.jdom.xpath.XPath; public class LexDumpImporter { private Document doc; private Logger logger; public LexDumpImporter(String path) throws JDOMException, IOException{ SAXBuilder builder = new SAXBuilder(); doc = builder.build(new File(path)); logger = Logger.getRootLogger(); } @SuppressWarnings("unchecked") public List<Element> getCartas() throws JDOMException{ return (List<Element>)XPath.selectNodes(doc, "//carta"); } public List<Element> getSignatures() throws JDOMException { return (List<Element>)XPath.selectNodes(doc, "//segna"); } public String getValue(Object context, String path) throws JDOMException { Object node = XPath.selectSingleNode(context, path); if (node==null){ return ""; } else if (Element.class.isInstance(node)){ List<String> retArray=new ArrayList<String>(); for (Object o: ((Element)node).getContent()) { if(Element.class.isInstance(o)){ retArray.add(((Element)o).getTextTrim()); } else if(Text.class.isInstance(o)) { retArray.add(((Text)o).getTextTrim()); } } Object[] X = retArray.toArray(); return StringUtils.join(X,' '); //return ((Element)node).getTextTrim(); } else if (Attribute.class.isInstance(node)){ return ((Attribute)node).getValue(); } return ""; } }