Mercurial > hg > anteater
view src/de/mpiwg/anteater/xml/impl/StanfordNERXMLParser.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line source
package de.mpiwg.anteater.xml.impl; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import org.jdom2.Element; import de.mpiwg.anteater.persons.APerson; import de.mpiwg.anteater.persons.Location; import de.mpiwg.anteater.persons.Organization; import de.mpiwg.anteater.persons.Person; import de.mpiwg.anteater.xml.IPersonFinderParser; public class StanfordNERXMLParser extends JDOMParser implements IPersonFinderParser { public StanfordNERXMLParser(String content) { super(content, false); } public StanfordNERXMLParser(InputStream stream) { super(stream); } @Override public List<APerson> parsePersons() { List<Element> stanfordNode = executeXPath("/stanford_ner", null); List<APerson> persons = new ArrayList<APerson>(); if (stanfordNode == null || stanfordNode.isEmpty()) return persons; List<Element> children = stanfordNode.get(0).getChildren(); for (Element pNode : children) { APerson person; if (pNode.getName().equals("person")) person = new Person(); else if (pNode.getName().equals("organization")) person = new Organization(); else if (pNode.getName().equals("location")) person = new Location(); else continue; person.setReferenceInText(pNode.getText()); person.setStart(pNode.getAttributeValue("start") != null ? new Integer(pNode.getAttributeValue("start")) : -1); person.setLength(pNode.getAttributeValue("length") != null ? new Integer(pNode.getAttributeValue("length")) : -1); persons.add(person); } return persons; } }