annotate src/Test.java @ 2:1c2b4f5e2c05

linnaeus for finding species
author jdamerow
date Mon, 22 Oct 2012 13:46:54 -0700
parents 036535fcd179
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 import java.net.URI;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import javax.ws.rs.core.UriBuilder;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import com.sun.jersey.api.client.Client;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 import com.sun.jersey.api.client.WebResource;
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import com.sun.jersey.api.client.config.ClientConfig;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8 import com.sun.jersey.api.client.config.DefaultClientConfig;
036535fcd179 anteater
jdamerow
parents:
diff changeset
9
2
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
10 import de.mpiwg.anteater.species.common.impl.LinnaeusNameFinder;
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
11
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 public class Test {
036535fcd179 anteater
jdamerow
parents:
diff changeset
13
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 public static void main(String[] args) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
15 ClientConfig config = new DefaultClientConfig();
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 Client client = Client.create(config);
036535fcd179 anteater
jdamerow
parents:
diff changeset
17 WebResource service = client.resource(getBaseURI2());
036535fcd179 anteater
jdamerow
parents:
diff changeset
18
036535fcd179 anteater
jdamerow
parents:
diff changeset
19 // Get XML
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 String content = "We seek public review. Notice is hereby given that Paul E. Nachtigall, Ph.D., "
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 + "Director, Marine Mammal Research Program, Hawaii Institute of Marine "
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 + "Biology, University of Hawaii, P.O. Box 1106, Kailua, Hawaii 96734, "
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 + "has been issued a permit to conduct scientific research on three captive "
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 + "bottlenose dolphins (Tursiops truncatus) and one captive false killer whale "
036535fcd179 anteater
jdamerow
parents:
diff changeset
25 + "(Pseudorca crassidens) for scientific research at the University of Hawaii.";
036535fcd179 anteater
jdamerow
parents:
diff changeset
26
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 String text = "A. Endangered Species. Applicant: 777 Ranch, Inc. Hondo TX; PRT-013008. The applicant requests renewal of their permit authorizing interstate and foreign commerce, export and cull of excess male barasingha (Cervus duvauceli), Eld's deer (Cervus eldi), Arabian oryx and red lechwe (Kobus leche) from their captive herd for the purpose of enhancement of the survival of the species. This notification covers activities conducted by the applicant over a 5-year period. Applicant: Lincoln Park Zoological Gardens, Chicago, IL; PRT-090113</HD>";
036535fcd179 anteater
jdamerow
parents:
diff changeset
28
2
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
29 LinnaeusNameFinder finder = new LinnaeusNameFinder(null);
1c2b4f5e2c05 linnaeus for finding species
jdamerow
parents: 0
diff changeset
30 finder.findCommonNames(text);
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
31
036535fcd179 anteater
jdamerow
parents:
diff changeset
32 // MultivaluedMap<String, String> values = new MultivaluedMapImpl();
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 // values.add("documentContent", content);
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 // values.add("documentType", "text/plain");
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 // values.add("appid",
036535fcd179 anteater
jdamerow
parents:
diff changeset
36 // "nV7rNc_V34F5EjG5VjBaFxIxpwxemBhUa0h4N2To8CnfjVBBmLrTVOgaCsXXU7EP");
036535fcd179 anteater
jdamerow
parents:
diff changeset
37
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 // MultivaluedMap<String, String> values = new MultivaluedMapImpl();
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 // values.add("text", content);
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 //
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 // String result =
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 // service.accept(MediaType.TEXT_XML).post(String.class, values);
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 // String result = service.accept(MediaType.TEXT_XML).get(String.class);
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 //
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 // System.out.println(result);
036535fcd179 anteater
jdamerow
parents:
diff changeset
46
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 // test ner
036535fcd179 anteater
jdamerow
parents:
diff changeset
48 // String serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz";
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 //
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 //
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 // AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier
036535fcd179 anteater
jdamerow
parents:
diff changeset
52 // .getClassifierNoExceptions(serializedClassifier);
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 //
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 // String text = "Notice is hereby given that <PERSON>Paul E. Nachtigall Paul Szelag</PERSON>, Ph.D., Director, <ORGANIZATION>Marine Mammal Research Program</ORGANIZATION>, <ORGANIZATION>Hawaii Institute of Marine Biology</ORGANIZATION>, University of <LOCATION>Hawaii</LOCATION>, P.O. Box 1106, <LOCATION>Kailua</LOCATION>, <LOCATION>Hawaii</LOCATION> 96734, has been issued a permit to conduct scientific research on three captive bottlenose dolphins (Tursiops truncatus) and one captive false killer whale (Pseudorca crassidens) for scientific research at the <ORGANIZATION>University of Hawaii</ORGANIZATION>.";
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 // //classifier.classifyWithInlineXML(content);
036535fcd179 anteater
jdamerow
parents:
diff changeset
56 // System.out.println(text);
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 // Pattern pattern = Pattern.compile("\\<[A-Z]+\\>.+?\\</[A-Z]+\\>");
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 // Matcher matcher = pattern.matcher(text);
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 // while (matcher.find()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
60 // System.out.print("Start index: " + matcher.start());
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 // System.out.print(" End index: " + matcher.end() + " ");
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 // System.out.println(matcher.group());
036535fcd179 anteater
jdamerow
parents:
diff changeset
63 // }
036535fcd179 anteater
jdamerow
parents:
diff changeset
64 //
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 // StanfordNLPTextParser p = new StanfordNLPTextParser();
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 // p.getSubjects(content);
036535fcd179 anteater
jdamerow
parents:
diff changeset
67
036535fcd179 anteater
jdamerow
parents:
diff changeset
68 // for (int i = 0; i < 5; i++) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 // System.out.println(i);
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 // try {
036535fcd179 anteater
jdamerow
parents:
diff changeset
71 // Thread.sleep(1000);
036535fcd179 anteater
jdamerow
parents:
diff changeset
72 // } catch (InterruptedException e) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
73 // // TODO Auto-generated catch block
036535fcd179 anteater
jdamerow
parents:
diff changeset
74 // e.printStackTrace();
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 // }
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 // }
036535fcd179 anteater
jdamerow
parents:
diff changeset
77
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 // String xml = "<hallo><a>xxx</a><a><b>2</b>abc</a><c/></hallo>";
036535fcd179 anteater
jdamerow
parents:
diff changeset
79 //
036535fcd179 anteater
jdamerow
parents:
diff changeset
80 // JDOMParser p = new JDOMParser(xml, false);
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 // System.out.println(p.executeXPath("/hallo/a/b[text()=2]"));
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
83
036535fcd179 anteater
jdamerow
parents:
diff changeset
84 private static URI getBaseURI2() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 return UriBuilder.fromUri("http://wherein.yahooapis.com/").path("v1")
036535fcd179 anteater
jdamerow
parents:
diff changeset
86 .path("document").build();
036535fcd179 anteater
jdamerow
parents:
diff changeset
87 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
88
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 private static URI getBaseURI() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
90 return UriBuilder.fromUri("http://gnrd.globalnames.org/")
036535fcd179 anteater
jdamerow
parents:
diff changeset
91 .path("name_finder.xml").build();
036535fcd179 anteater
jdamerow
parents:
diff changeset
92 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
93
036535fcd179 anteater
jdamerow
parents:
diff changeset
94 private static URI getBaseURIToken() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
95 return UriBuilder
036535fcd179 anteater
jdamerow
parents:
diff changeset
96 .fromUri(
036535fcd179 anteater
jdamerow
parents:
diff changeset
97 "http://gnrd.globalnames.org/name_finder.xml?token=Eo6PwCuWRl6owxBiHp1g2g")
036535fcd179 anteater
jdamerow
parents:
diff changeset
98 .build();
036535fcd179 anteater
jdamerow
parents:
diff changeset
99 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
100
036535fcd179 anteater
jdamerow
parents:
diff changeset
101 }