annotate src/de/mpiwg/anteater/xml/impl/StanfordNERXMLParser.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.xml.impl;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.io.InputStream;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.util.ArrayList;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import org.jdom2.Element;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 import de.mpiwg.anteater.persons.APerson;
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import de.mpiwg.anteater.persons.Location;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import de.mpiwg.anteater.persons.Organization;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 import de.mpiwg.anteater.persons.Person;
036535fcd179 anteater
jdamerow
parents:
diff changeset
13 import de.mpiwg.anteater.xml.IPersonFinderParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
14
036535fcd179 anteater
jdamerow
parents:
diff changeset
15 public class StanfordNERXMLParser extends JDOMParser implements IPersonFinderParser {
036535fcd179 anteater
jdamerow
parents:
diff changeset
16
036535fcd179 anteater
jdamerow
parents:
diff changeset
17 public StanfordNERXMLParser(String content) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 super(content, false);
036535fcd179 anteater
jdamerow
parents:
diff changeset
19 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
20
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 public StanfordNERXMLParser(InputStream stream) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 super(stream);
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
24
036535fcd179 anteater
jdamerow
parents:
diff changeset
25
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 @Override
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 public List<APerson> parsePersons() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 List<Element> stanfordNode = executeXPath("/stanford_ner", null);
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 List<APerson> persons = new ArrayList<APerson>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
30
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 if (stanfordNode == null || stanfordNode.isEmpty())
036535fcd179 anteater
jdamerow
parents:
diff changeset
32 return persons;
036535fcd179 anteater
jdamerow
parents:
diff changeset
33
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 List<Element> children = stanfordNode.get(0).getChildren();
036535fcd179 anteater
jdamerow
parents:
diff changeset
35
036535fcd179 anteater
jdamerow
parents:
diff changeset
36 for (Element pNode : children) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 APerson person;
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 if (pNode.getName().equals("person"))
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 person = new Person();
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 else if (pNode.getName().equals("organization"))
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 person = new Organization();
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 else if (pNode.getName().equals("location"))
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 person = new Location();
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 else
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 continue;
036535fcd179 anteater
jdamerow
parents:
diff changeset
46
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 person.setReferenceInText(pNode.getText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
48 person.setStart(pNode.getAttributeValue("start") != null ? new Integer(pNode.getAttributeValue("start")) : -1);
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 person.setLength(pNode.getAttributeValue("length") != null ? new Integer(pNode.getAttributeValue("length")) : -1);
036535fcd179 anteater
jdamerow
parents:
diff changeset
50
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 persons.add(person);
036535fcd179 anteater
jdamerow
parents:
diff changeset
52 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
53
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 return persons;
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
56
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 }