view src/de/mpiwg/anteater/xml/impl/StanfordNERXMLParser.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line source

package de.mpiwg.anteater.xml.impl;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.jdom2.Element;

import de.mpiwg.anteater.persons.APerson;
import de.mpiwg.anteater.persons.Location;
import de.mpiwg.anteater.persons.Organization;
import de.mpiwg.anteater.persons.Person;
import de.mpiwg.anteater.xml.IPersonFinderParser;

public class StanfordNERXMLParser extends JDOMParser implements IPersonFinderParser {

	public StanfordNERXMLParser(String content) {
		super(content, false);
	}	
	
	public StanfordNERXMLParser(InputStream stream) {
		super(stream);
	}

	
	@Override
	public List<APerson> parsePersons() {
		List<Element> stanfordNode = executeXPath("/stanford_ner", null);
		List<APerson> persons = new ArrayList<APerson>();
		
		if (stanfordNode == null || stanfordNode.isEmpty())
			return persons;
		
		List<Element> children = stanfordNode.get(0).getChildren();
		
		for (Element pNode : children) {
			APerson person;
			if (pNode.getName().equals("person"))
				person = new Person();
			else if (pNode.getName().equals("organization"))
				person = new Organization();
			else if (pNode.getName().equals("location"))
				person = new Location();
			else 
				continue;
			
			person.setReferenceInText(pNode.getText());
			person.setStart(pNode.getAttributeValue("start") != null ? new Integer(pNode.getAttributeValue("start")) : -1);
			person.setLength(pNode.getAttributeValue("length") != null ? new Integer(pNode.getAttributeValue("length")) : -1);		
			
			persons.add(person);
		}
		
		return persons;
	}

}