Mercurial > hg > anteater
diff src/de/mpiwg/anteater/events/processors/PermitOrApplicantEventProcessor.java @ 1:7a4341c9f2e5
checking permit numbers for similarity if no direct match
author | jdamerow |
---|---|
date | Fri, 05 Oct 2012 18:52:14 -0700 |
parents | 036535fcd179 |
children |
line wrap: on
line diff
--- a/src/de/mpiwg/anteater/events/processors/PermitOrApplicantEventProcessor.java Fri Sep 14 10:30:43 2012 +0200 +++ b/src/de/mpiwg/anteater/events/processors/PermitOrApplicantEventProcessor.java Fri Oct 05 18:52:14 2012 -0700 @@ -5,6 +5,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import uk.ac.shef.wit.simmetrics.similaritymetrics.JaroWinkler; + import de.mpiwg.anteater.events.Applicant; import de.mpiwg.anteater.events.ResearchEvent; import de.mpiwg.anteater.ml.PlaceClasses; @@ -128,6 +130,7 @@ IfStatement: if (matcher.find()) { String numberInPara = matcher.group(1); + // check if found number exisits in events for (ResearchEvent ev : newEvents) { if (numberInPara.equals(ev.getApplicationOrPermitNo())) { @@ -135,6 +138,25 @@ break IfStatement; } } + // if there is no event with permit number + // check if just something was clipped + for (ResearchEvent ev : newEvents) { + if (ev.getApplicationOrPermitNo().contains(numberInPara) || numberInPara.contains(ev.getApplicationOrPermitNo())) { + eventForPara = ev; + break IfStatement; + } + } + // if there is still no event found + // check for switched numbers + for (ResearchEvent ev : newEvents) { + JaroWinkler winkler = new JaroWinkler(); + double sim = winkler.getSimilarity(numberInPara, ev.getApplicationOrPermitNo()); + if (sim > 0.85) + { + eventForPara = ev; + break IfStatement; + } + } eventForPara = null; continue; }