diff src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java @ 4:dcc35f89dce3

include linneaus findings
author jdamerow
date Thu, 25 Oct 2012 15:25:08 -0700
parents ae96e4bc7fb2
children
line wrap: on
line diff
--- a/src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java	Mon Oct 22 14:21:14 2012 -0700
+++ b/src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java	Thu Oct 25 15:25:08 2012 -0700
@@ -44,20 +44,35 @@
 		matcher.match("test", new Document("none", null, null, null, null,
 				null, null, null, null, null, null, null, null, null, null));
 
-		Document doc = new Document("id", "title", "", text, text, Document.Text_raw_type.TEXT, "", null, Document.Type.OTHER, null, "", "", "", "", null);
-		TaggedDocument tagged = MatchOperations.matchDocument(matcher, doc);
-		List<Mention> species = tagged.getAllMatches();
+		
 		StringBuffer sb = new StringBuffer();
 		sb.append("<linnaeus>");
-		
+
+		Document doc = new Document("", "", "", "", text,
+				Document.Text_raw_type.TEXT, "", null, Document.Type.OTHER,
+				null, "", "", "", "", null);
+		TaggedDocument tagged = MatchOperations.matchDocument(matcher, doc);
+		List<Mention> species = tagged.getAllMatches();
+
 		for (Mention s : species) {
-			sb.append("<species id=\"" + StringEscapeUtils.escapeXml(s.getMostProbableID()) + "\" ");
-			sb.append("start=\"" + s.getStart() + "\" ");
-			sb.append("end=\"" + s.getEnd() + "\" ");
-			sb.append("text=\"" + StringEscapeUtils.escapeXml(s.getText()) + "\" ");
+
+			int startAt = s.getStart() - s.getText().length() > -1 ? s
+					.getStart() - s.getText().length() : 0;
+			String stub = text.substring(startAt);
+
+			int foundAt = stub.indexOf(s.getText());
+
+			sb.append("<species id=\""
+					+ StringEscapeUtils.escapeXml(s.getMostProbableID())
+					+ "\" ");
+			sb.append("start=\"" + (startAt + foundAt) + "\" ");
+			sb.append("end=\"" + (startAt + foundAt + s.getText().length())
+					+ "\" ");
+			sb.append("text=\"" + StringEscapeUtils.escapeXml(s.getText())
+					+ "\" ");
 			sb.append("/>");
 		}
-		
+
 		sb.append("</linnaeus>");
 		System.out.println(sb.toString());
 		return sb.toString();