comparison src/de/mpiwg/anteater/species/common/impl/LinnaeusNameFinder.java @ 4:dcc35f89dce3

include linneaus findings
author jdamerow
date Thu, 25 Oct 2012 15:25:08 -0700
parents ae96e4bc7fb2
children
comparison
equal deleted inserted replaced
3:ae96e4bc7fb2 4:dcc35f89dce3
42 getPostprocessor(new HashMap<String, String>(), "")); 42 getPostprocessor(new HashMap<String, String>(), ""));
43 43
44 matcher.match("test", new Document("none", null, null, null, null, 44 matcher.match("test", new Document("none", null, null, null, null,
45 null, null, null, null, null, null, null, null, null, null)); 45 null, null, null, null, null, null, null, null, null, null));
46 46
47 Document doc = new Document("id", "title", "", text, text, Document.Text_raw_type.TEXT, "", null, Document.Type.OTHER, null, "", "", "", "", null); 47
48 StringBuffer sb = new StringBuffer();
49 sb.append("<linnaeus>");
50
51 Document doc = new Document("", "", "", "", text,
52 Document.Text_raw_type.TEXT, "", null, Document.Type.OTHER,
53 null, "", "", "", "", null);
48 TaggedDocument tagged = MatchOperations.matchDocument(matcher, doc); 54 TaggedDocument tagged = MatchOperations.matchDocument(matcher, doc);
49 List<Mention> species = tagged.getAllMatches(); 55 List<Mention> species = tagged.getAllMatches();
50 StringBuffer sb = new StringBuffer(); 56
51 sb.append("<linnaeus>");
52
53 for (Mention s : species) { 57 for (Mention s : species) {
54 sb.append("<species id=\"" + StringEscapeUtils.escapeXml(s.getMostProbableID()) + "\" "); 58
55 sb.append("start=\"" + s.getStart() + "\" "); 59 int startAt = s.getStart() - s.getText().length() > -1 ? s
56 sb.append("end=\"" + s.getEnd() + "\" "); 60 .getStart() - s.getText().length() : 0;
57 sb.append("text=\"" + StringEscapeUtils.escapeXml(s.getText()) + "\" "); 61 String stub = text.substring(startAt);
62
63 int foundAt = stub.indexOf(s.getText());
64
65 sb.append("<species id=\""
66 + StringEscapeUtils.escapeXml(s.getMostProbableID())
67 + "\" ");
68 sb.append("start=\"" + (startAt + foundAt) + "\" ");
69 sb.append("end=\"" + (startAt + foundAt + s.getText().length())
70 + "\" ");
71 sb.append("text=\"" + StringEscapeUtils.escapeXml(s.getText())
72 + "\" ");
58 sb.append("/>"); 73 sb.append("/>");
59 } 74 }
60 75
61 sb.append("</linnaeus>"); 76 sb.append("</linnaeus>");
62 System.out.println(sb.toString()); 77 System.out.println(sb.toString());
63 return sb.toString(); 78 return sb.toString();
64 } 79 }
65 80