Mercurial > hg > anteater
diff src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java @ 4:dcc35f89dce3
include linneaus findings
author | jdamerow |
---|---|
date | Thu, 25 Oct 2012 15:25:08 -0700 |
parents | ae96e4bc7fb2 |
children |
line wrap: on
line diff
--- a/src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java Mon Oct 22 14:21:14 2012 -0700 +++ b/src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java Thu Oct 25 15:25:08 2012 -0700 @@ -44,20 +44,35 @@ matcher.match("test", new Document("none", null, null, null, null, null, null, null, null, null, null, null, null, null, null)); - Document doc = new Document("id", "title", "", text, text, Document.Text_raw_type.TEXT, "", null, Document.Type.OTHER, null, "", "", "", "", null); - TaggedDocument tagged = MatchOperations.matchDocument(matcher, doc); - List<Mention> species = tagged.getAllMatches(); + StringBuffer sb = new StringBuffer(); sb.append("<linnaeus>"); - + + Document doc = new Document("", "", "", "", text, + Document.Text_raw_type.TEXT, "", null, Document.Type.OTHER, + null, "", "", "", "", null); + TaggedDocument tagged = MatchOperations.matchDocument(matcher, doc); + List<Mention> species = tagged.getAllMatches(); + for (Mention s : species) { - sb.append("<species id=\"" + StringEscapeUtils.escapeXml(s.getMostProbableID()) + "\" "); - sb.append("start=\"" + s.getStart() + "\" "); - sb.append("end=\"" + s.getEnd() + "\" "); - sb.append("text=\"" + StringEscapeUtils.escapeXml(s.getText()) + "\" "); + + int startAt = s.getStart() - s.getText().length() > -1 ? s + .getStart() - s.getText().length() : 0; + String stub = text.substring(startAt); + + int foundAt = stub.indexOf(s.getText()); + + sb.append("<species id=\"" + + StringEscapeUtils.escapeXml(s.getMostProbableID()) + + "\" "); + sb.append("start=\"" + (startAt + foundAt) + "\" "); + sb.append("end=\"" + (startAt + foundAt + s.getText().length()) + + "\" "); + sb.append("text=\"" + StringEscapeUtils.escapeXml(s.getText()) + + "\" "); sb.append("/>"); } - + sb.append("</linnaeus>"); System.out.println(sb.toString()); return sb.toString();