view src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java @ 8:919e9f3b5efd

neue klassen zur textanalyse (stanford parser eingebaut) alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author dwinter
date Thu, 21 Jun 2012 17:08:22 +0200
parents fb3f3df002df
children
line wrap: on
line source

package de.mpiwg.dwinter.duomo.lexdump;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Text;
import org.jdom.input.SAXBuilder;
import org.jdom.xpath.XPath;



public class LexDumpImporter {

	private Document doc;
	private Logger logger;

	public LexDumpImporter(String path) throws JDOMException, IOException{
		
		SAXBuilder builder = new SAXBuilder();
		
		doc = builder.build(new File(path));
		
		logger = Logger.getRootLogger();
	}
	
	@SuppressWarnings("unchecked")
	public List<Element> getCartas() throws JDOMException{
		return (List<Element>)XPath.selectNodes(doc, "//carta");
	}

	public List<Element> getSignatures() throws JDOMException {
		return (List<Element>)XPath.selectNodes(doc, "//segna");
	}

	public String getValue(Object context, String path) throws JDOMException {
		
		Object node = XPath.selectSingleNode(context, path);
		
		if (node==null){
			return "";
		} else if (Element.class.isInstance(node)){
			List<String> retArray=new ArrayList<String>();
			for (Object o: ((Element)node).getContent())
			{
				if(Element.class.isInstance(o)){
					retArray.add(((Element)o).getTextTrim());
				} else if(Text.class.isInstance(o)) {
					retArray.add(((Text)o).getTextTrim());
				}
			}
			Object[] X = retArray.toArray();
			return StringUtils.join(X,' ');
			//return ((Element)node).getTextTrim();
		} else if (Attribute.class.isInstance(node)){
			return ((Attribute)node).getValue();
		}
		
		return "";
	}
}