Mercurial > hg > anteater
comparison src/de/mpiwg/anteater/events/processors/PermitOrApplicantEventProcessor.java @ 1:7a4341c9f2e5
checking permit numbers for similarity if no direct match
author | jdamerow |
---|---|
date | Fri, 05 Oct 2012 18:52:14 -0700 |
parents | 036535fcd179 |
children |
comparison
equal
deleted
inserted
replaced
0:036535fcd179 | 1:7a4341c9f2e5 |
---|---|
2 | 2 |
3 import java.util.ArrayList; | 3 import java.util.ArrayList; |
4 import java.util.List; | 4 import java.util.List; |
5 import java.util.regex.Matcher; | 5 import java.util.regex.Matcher; |
6 import java.util.regex.Pattern; | 6 import java.util.regex.Pattern; |
7 | |
8 import uk.ac.shef.wit.simmetrics.similaritymetrics.JaroWinkler; | |
7 | 9 |
8 import de.mpiwg.anteater.events.Applicant; | 10 import de.mpiwg.anteater.events.Applicant; |
9 import de.mpiwg.anteater.events.ResearchEvent; | 11 import de.mpiwg.anteater.events.ResearchEvent; |
10 import de.mpiwg.anteater.ml.PlaceClasses; | 12 import de.mpiwg.anteater.ml.PlaceClasses; |
11 import de.mpiwg.anteater.results.ApplicantResult; | 13 import de.mpiwg.anteater.results.ApplicantResult; |
126 Pattern pattern = Pattern.compile(applicationNrPattern); | 128 Pattern pattern = Pattern.compile(applicationNrPattern); |
127 Matcher matcher = pattern.matcher(para.getParagraphText()); | 129 Matcher matcher = pattern.matcher(para.getParagraphText()); |
128 | 130 |
129 IfStatement: if (matcher.find()) { | 131 IfStatement: if (matcher.find()) { |
130 String numberInPara = matcher.group(1); | 132 String numberInPara = matcher.group(1); |
133 // check if found number exisits in events | |
131 for (ResearchEvent ev : newEvents) { | 134 for (ResearchEvent ev : newEvents) { |
132 if (numberInPara.equals(ev.getApplicationOrPermitNo())) | 135 if (numberInPara.equals(ev.getApplicationOrPermitNo())) |
136 { | |
137 eventForPara = ev; | |
138 break IfStatement; | |
139 } | |
140 } | |
141 // if there is no event with permit number | |
142 // check if just something was clipped | |
143 for (ResearchEvent ev : newEvents) { | |
144 if (ev.getApplicationOrPermitNo().contains(numberInPara) || numberInPara.contains(ev.getApplicationOrPermitNo())) { | |
145 eventForPara = ev; | |
146 break IfStatement; | |
147 } | |
148 } | |
149 // if there is still no event found | |
150 // check for switched numbers | |
151 for (ResearchEvent ev : newEvents) { | |
152 JaroWinkler winkler = new JaroWinkler(); | |
153 double sim = winkler.getSimilarity(numberInPara, ev.getApplicationOrPermitNo()); | |
154 if (sim > 0.85) | |
133 { | 155 { |
134 eventForPara = ev; | 156 eventForPara = ev; |
135 break IfStatement; | 157 break IfStatement; |
136 } | 158 } |
137 } | 159 } |