Mercurial > hg > anteater
diff src/de/mpiwg/anteater/xml/impl/StanfordNERXMLParser.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/anteater/xml/impl/StanfordNERXMLParser.java Fri Sep 14 10:30:43 2012 +0200 @@ -0,0 +1,57 @@ +package de.mpiwg.anteater.xml.impl; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.jdom2.Element; + +import de.mpiwg.anteater.persons.APerson; +import de.mpiwg.anteater.persons.Location; +import de.mpiwg.anteater.persons.Organization; +import de.mpiwg.anteater.persons.Person; +import de.mpiwg.anteater.xml.IPersonFinderParser; + +public class StanfordNERXMLParser extends JDOMParser implements IPersonFinderParser { + + public StanfordNERXMLParser(String content) { + super(content, false); + } + + public StanfordNERXMLParser(InputStream stream) { + super(stream); + } + + + @Override + public List<APerson> parsePersons() { + List<Element> stanfordNode = executeXPath("/stanford_ner", null); + List<APerson> persons = new ArrayList<APerson>(); + + if (stanfordNode == null || stanfordNode.isEmpty()) + return persons; + + List<Element> children = stanfordNode.get(0).getChildren(); + + for (Element pNode : children) { + APerson person; + if (pNode.getName().equals("person")) + person = new Person(); + else if (pNode.getName().equals("organization")) + person = new Organization(); + else if (pNode.getName().equals("location")) + person = new Location(); + else + continue; + + person.setReferenceInText(pNode.getText()); + person.setStart(pNode.getAttributeValue("start") != null ? new Integer(pNode.getAttributeValue("start")) : -1); + person.setLength(pNode.getAttributeValue("length") != null ? new Integer(pNode.getAttributeValue("length")) : -1); + + persons.add(person); + } + + return persons; + } + +}