0
|
1 package de.mpiwg.anteater.xml.impl;
|
|
2
|
|
3 import java.io.InputStream;
|
|
4 import java.util.ArrayList;
|
|
5 import java.util.List;
|
|
6
|
|
7 import org.jdom2.Element;
|
|
8
|
|
9 import de.mpiwg.anteater.persons.APerson;
|
|
10 import de.mpiwg.anteater.persons.Location;
|
|
11 import de.mpiwg.anteater.persons.Organization;
|
|
12 import de.mpiwg.anteater.persons.Person;
|
|
13 import de.mpiwg.anteater.xml.IPersonFinderParser;
|
|
14
|
|
15 public class StanfordNERXMLParser extends JDOMParser implements IPersonFinderParser {
|
|
16
|
|
17 public StanfordNERXMLParser(String content) {
|
|
18 super(content, false);
|
|
19 }
|
|
20
|
|
21 public StanfordNERXMLParser(InputStream stream) {
|
|
22 super(stream);
|
|
23 }
|
|
24
|
|
25
|
|
26 @Override
|
|
27 public List<APerson> parsePersons() {
|
|
28 List<Element> stanfordNode = executeXPath("/stanford_ner", null);
|
|
29 List<APerson> persons = new ArrayList<APerson>();
|
|
30
|
|
31 if (stanfordNode == null || stanfordNode.isEmpty())
|
|
32 return persons;
|
|
33
|
|
34 List<Element> children = stanfordNode.get(0).getChildren();
|
|
35
|
|
36 for (Element pNode : children) {
|
|
37 APerson person;
|
|
38 if (pNode.getName().equals("person"))
|
|
39 person = new Person();
|
|
40 else if (pNode.getName().equals("organization"))
|
|
41 person = new Organization();
|
|
42 else if (pNode.getName().equals("location"))
|
|
43 person = new Location();
|
|
44 else
|
|
45 continue;
|
|
46
|
|
47 person.setReferenceInText(pNode.getText());
|
|
48 person.setStart(pNode.getAttributeValue("start") != null ? new Integer(pNode.getAttributeValue("start")) : -1);
|
|
49 person.setLength(pNode.getAttributeValue("length") != null ? new Integer(pNode.getAttributeValue("length")) : -1);
|
|
50
|
|
51 persons.add(person);
|
|
52 }
|
|
53
|
|
54 return persons;
|
|
55 }
|
|
56
|
|
57 }
|