Mercurial > hg > anteater
comparison src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java @ 4:dcc35f89dce3
include linneaus findings
author | jdamerow |
---|---|
date | Thu, 25 Oct 2012 15:25:08 -0700 |
parents | ae96e4bc7fb2 |
children |
comparison
equal
deleted
inserted
replaced
3:ae96e4bc7fb2 | 4:dcc35f89dce3 |
---|---|
42 getPostprocessor(new HashMap<String, String>(), "")); | 42 getPostprocessor(new HashMap<String, String>(), "")); |
43 | 43 |
44 matcher.match("test", new Document("none", null, null, null, null, | 44 matcher.match("test", new Document("none", null, null, null, null, |
45 null, null, null, null, null, null, null, null, null, null)); | 45 null, null, null, null, null, null, null, null, null, null)); |
46 | 46 |
47 Document doc = new Document("id", "title", "", text, text, Document.Text_raw_type.TEXT, "", null, Document.Type.OTHER, null, "", "", "", "", null); | 47 |
48 StringBuffer sb = new StringBuffer(); | |
49 sb.append("<linnaeus>"); | |
50 | |
51 Document doc = new Document("", "", "", "", text, | |
52 Document.Text_raw_type.TEXT, "", null, Document.Type.OTHER, | |
53 null, "", "", "", "", null); | |
48 TaggedDocument tagged = MatchOperations.matchDocument(matcher, doc); | 54 TaggedDocument tagged = MatchOperations.matchDocument(matcher, doc); |
49 List<Mention> species = tagged.getAllMatches(); | 55 List<Mention> species = tagged.getAllMatches(); |
50 StringBuffer sb = new StringBuffer(); | 56 |
51 sb.append("<linnaeus>"); | |
52 | |
53 for (Mention s : species) { | 57 for (Mention s : species) { |
54 sb.append("<species id=\"" + StringEscapeUtils.escapeXml(s.getMostProbableID()) + "\" "); | 58 |
55 sb.append("start=\"" + s.getStart() + "\" "); | 59 int startAt = s.getStart() - s.getText().length() > -1 ? s |
56 sb.append("end=\"" + s.getEnd() + "\" "); | 60 .getStart() - s.getText().length() : 0; |
57 sb.append("text=\"" + StringEscapeUtils.escapeXml(s.getText()) + "\" "); | 61 String stub = text.substring(startAt); |
62 | |
63 int foundAt = stub.indexOf(s.getText()); | |
64 | |
65 sb.append("<species id=\"" | |
66 + StringEscapeUtils.escapeXml(s.getMostProbableID()) | |
67 + "\" "); | |
68 sb.append("start=\"" + (startAt + foundAt) + "\" "); | |
69 sb.append("end=\"" + (startAt + foundAt + s.getText().length()) | |
70 + "\" "); | |
71 sb.append("text=\"" + StringEscapeUtils.escapeXml(s.getText()) | |
72 + "\" "); | |
58 sb.append("/>"); | 73 sb.append("/>"); |
59 } | 74 } |
60 | 75 |
61 sb.append("</linnaeus>"); | 76 sb.append("</linnaeus>"); |
62 System.out.println(sb.toString()); | 77 System.out.println(sb.toString()); |
63 return sb.toString(); | 78 return sb.toString(); |
64 } | 79 } |
65 | 80 |