0
|
1 package de.mpiwg.anteater.events.processors;
|
|
2
|
|
3 import java.util.ArrayList;
|
|
4 import java.util.List;
|
|
5 import java.util.regex.Matcher;
|
|
6 import java.util.regex.Pattern;
|
|
7
|
|
8 import de.mpiwg.anteater.events.Applicant;
|
|
9 import de.mpiwg.anteater.events.ResearchEvent;
|
|
10 import de.mpiwg.anteater.ml.PlaceClasses;
|
|
11 import de.mpiwg.anteater.results.ApplicantResult;
|
|
12 import de.mpiwg.anteater.results.LocationResult;
|
|
13 import de.mpiwg.anteater.results.ResultsCarrier;
|
|
14 import de.mpiwg.anteater.results.SpeciesScientificResult;
|
|
15 import de.mpiwg.anteater.text.Paragraph;
|
|
16 import de.mpiwg.anteater.text.TextInformation;
|
|
17 import de.mpiwg.anteater.text.TextPart;
|
|
18
|
|
19 public class PermitOrApplicantEventProcessor extends AEventProcessor {
|
|
20
|
|
21 private String applicationNrPattern = "No. ([A-Z0-9\\-]{2,})";
|
|
22
|
|
23 @Override
|
|
24 public void processEvents(List<ResearchEvent> events, ResultsCarrier carrier) {
|
|
25 TextInformation info = carrier.getTextInfo();
|
|
26
|
|
27 // find how many distinct applicants there are
|
|
28 List<ApplicantResult> distinctApplicants = getDistinctApplicants(carrier);
|
|
29
|
|
30 if (distinctApplicants.size() == 1) {
|
|
31 return;
|
|
32 }
|
|
33
|
|
34 // if there are several applicants start new event with each applicant
|
|
35 List<ApplicantResult> appsInSummary = new ArrayList<ApplicantResult>();
|
|
36 List<ApplicantResult> appsInSuppleInf = new ArrayList<ApplicantResult>();
|
|
37
|
|
38 sortByTextType(distinctApplicants, appsInSummary, appsInSuppleInf);
|
|
39
|
|
40 if (appsInSummary.isEmpty()) {
|
|
41 return;
|
|
42 }
|
|
43
|
|
44 List<ResearchEvent> newEvents = new ArrayList<ResearchEvent>();
|
|
45
|
|
46 List<SpeciesScientificResult> speciesInSummary = new ArrayList<SpeciesScientificResult>();
|
|
47 List<SpeciesScientificResult> speciesInSuppleInf = new ArrayList<SpeciesScientificResult>();
|
|
48 sortSpeciesByTextType(carrier.getSpeciesResults(), speciesInSummary, speciesInSuppleInf);
|
|
49
|
|
50 List<LocationResult> locationsInSummary = new ArrayList<LocationResult>();
|
|
51 List<LocationResult> locationsInSuppleInf = new ArrayList<LocationResult>();
|
|
52 sortLocsByTextType(carrier.getLocationResults(), locationsInSummary, locationsInSuppleInf);
|
|
53
|
|
54
|
|
55 ResearchEvent event = null;
|
|
56 for (ApplicantResult appResult : appsInSummary) {
|
|
57 TextPart text = info.getSummaries().get(
|
|
58 appResult.getResult().getTextIdx());
|
|
59
|
|
60 int startSearchForNumberAt = appResult.getFinding().getStart()
|
|
61 + appResult.getFinding().getLength();
|
|
62
|
|
63 int endSearchForNumber = text.getText().length();
|
|
64 if (appsInSummary.indexOf(appResult) < appsInSummary.size() - 1)
|
|
65 endSearchForNumber = appsInSummary
|
|
66 .get(appsInSummary.indexOf(appResult) + 1).getFinding()
|
|
67 .getStart();
|
|
68
|
|
69 String textAsString = text.getText();
|
|
70 String textAfterApplicant = textAsString.substring(
|
|
71 startSearchForNumberAt, endSearchForNumber);
|
|
72
|
|
73 // check if there is an application number
|
|
74 Pattern pattern = Pattern.compile(applicationNrPattern);
|
|
75 Matcher matcher = pattern.matcher(textAfterApplicant);
|
|
76
|
|
77 List<LocationResult> locationsForApp = new ArrayList<LocationResult>();
|
|
78 // find all locations between current applicant and next one
|
|
79 for (LocationResult locationResult : locationsInSummary) {
|
|
80 if (locationResult.getFinding().getStart() >= startSearchForNumberAt
|
|
81 && locationResult.getFinding().getStart() < endSearchForNumber)
|
|
82 locationsForApp.add(locationResult);
|
|
83 }
|
|
84
|
|
85 List<SpeciesScientificResult> speciesForApp = new ArrayList<SpeciesScientificResult>();
|
|
86 // find all species between current applicant and next one
|
|
87 for (SpeciesScientificResult speciesResult : speciesInSummary) {
|
|
88 if (speciesResult.getFinding().getStart() > startSearchForNumberAt
|
|
89 && speciesResult.getFinding().getStart() < endSearchForNumber)
|
|
90 speciesForApp.add(speciesResult);
|
|
91 }
|
|
92
|
|
93 if (event != null) {
|
|
94 List<Applicant> applicantsInEvent = event.getApplicants();
|
|
95 List<Applicant> applicantsWithoutLoc = new ArrayList<Applicant>();
|
|
96
|
|
97 Applicant newApplicant = createApplicant(appResult);
|
|
98 event.getApplicants().add(newApplicant);
|
|
99
|
|
100 for (Applicant applicantInEvent : applicantsInEvent)
|
|
101 if (applicantInEvent.getApplicantInstitution().isEmpty()
|
|
102 && applicantInEvent.getLocation().isEmpty())
|
|
103 applicantsWithoutLoc.add(applicantInEvent);
|
|
104
|
|
105 setLocations(applicantsWithoutLoc, locationsForApp, event);
|
|
106 setSpecies(speciesForApp, event);
|
|
107 }
|
|
108
|
|
109 if (event == null)
|
|
110 event = createEvent(appResult, info, locationsForApp,
|
|
111 speciesForApp);
|
|
112
|
|
113 if (matcher.find()) {
|
|
114 event.setApplicationOrPermitNo(matcher.group(1));
|
|
115 newEvents.add(event);
|
|
116 event = null;
|
|
117 }
|
|
118 }
|
|
119
|
|
120 ResearchEvent eventForPara = null;
|
|
121 for (TextPart text : info.getSupplInfos()) {
|
|
122 for (Paragraph para : text.getParagraphs()) {
|
|
123 int startOfPara = text.getPositionInTextFromParagraph(para, 0);
|
|
124 int endOfPara = startOfPara + para.getParagraphText().length();
|
|
125
|
|
126 Pattern pattern = Pattern.compile(applicationNrPattern);
|
|
127 Matcher matcher = pattern.matcher(para.getParagraphText());
|
|
128
|
|
129 IfStatement: if (matcher.find()) {
|
|
130 String numberInPara = matcher.group(1);
|
|
131 for (ResearchEvent ev : newEvents) {
|
|
132 if (numberInPara.equals(ev.getApplicationOrPermitNo()))
|
|
133 {
|
|
134 eventForPara = ev;
|
|
135 break IfStatement;
|
|
136 }
|
|
137 }
|
|
138 eventForPara = null;
|
|
139 continue;
|
|
140 }
|
|
141
|
|
142 if (eventForPara == null)
|
|
143 continue;
|
|
144
|
|
145 for (LocationResult loc : locationsInSuppleInf) {
|
|
146 if (info.getSupplInfos().indexOf(text) != loc.getResult().getTextIdx()) {
|
|
147 continue;
|
|
148 }
|
|
149
|
|
150 if (loc.getFinding().getStart() >= startOfPara && loc.getFinding().getStart() < endOfPara) {
|
|
151 if (loc.getPrediction() == PlaceClasses.RESEARCH_LOCATION)
|
|
152 eventForPara.getResearchLocations().add(createLocation(loc));
|
|
153 }
|
|
154 }
|
|
155
|
|
156 for (SpeciesScientificResult spec : speciesInSuppleInf) {
|
|
157 if (info.getSupplInfos().indexOf(text) != spec.getResult().getTextIdx()) {
|
|
158 continue;
|
|
159 }
|
|
160
|
|
161 if (spec.getFinding().getStart() >= startOfPara && spec.getFinding().getStart() < endOfPara) {
|
|
162 eventForPara.getResearchedSpecies().add(createSpecies(spec));
|
|
163 }
|
|
164 }
|
|
165 }
|
|
166 }
|
|
167
|
|
168 events.addAll(newEvents);
|
|
169 }
|
|
170
|
|
171 @Override
|
|
172 public int getRank() {
|
|
173 return 10;
|
|
174 }
|
|
175
|
|
176 }
|