diff src/de/mpiwg/anteater/xml/impl/StanfordNERXMLParser.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/anteater/xml/impl/StanfordNERXMLParser.java	Fri Sep 14 10:30:43 2012 +0200
@@ -0,0 +1,57 @@
+package de.mpiwg.anteater.xml.impl;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.jdom2.Element;
+
+import de.mpiwg.anteater.persons.APerson;
+import de.mpiwg.anteater.persons.Location;
+import de.mpiwg.anteater.persons.Organization;
+import de.mpiwg.anteater.persons.Person;
+import de.mpiwg.anteater.xml.IPersonFinderParser;
+
+public class StanfordNERXMLParser extends JDOMParser implements IPersonFinderParser {
+
+	public StanfordNERXMLParser(String content) {
+		super(content, false);
+	}	
+	
+	public StanfordNERXMLParser(InputStream stream) {
+		super(stream);
+	}
+
+	
+	@Override
+	public List<APerson> parsePersons() {
+		List<Element> stanfordNode = executeXPath("/stanford_ner", null);
+		List<APerson> persons = new ArrayList<APerson>();
+		
+		if (stanfordNode == null || stanfordNode.isEmpty())
+			return persons;
+		
+		List<Element> children = stanfordNode.get(0).getChildren();
+		
+		for (Element pNode : children) {
+			APerson person;
+			if (pNode.getName().equals("person"))
+				person = new Person();
+			else if (pNode.getName().equals("organization"))
+				person = new Organization();
+			else if (pNode.getName().equals("location"))
+				person = new Location();
+			else 
+				continue;
+			
+			person.setReferenceInText(pNode.getText());
+			person.setStart(pNode.getAttributeValue("start") != null ? new Integer(pNode.getAttributeValue("start")) : -1);
+			person.setLength(pNode.getAttributeValue("length") != null ? new Integer(pNode.getAttributeValue("length")) : -1);		
+			
+			persons.add(person);
+		}
+		
+		return persons;
+	}
+
+}