annotate src/NERDemo.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 import java.io.IOException;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
3
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import edu.stanford.nlp.ie.AbstractSequenceClassifier;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import edu.stanford.nlp.ie.crf.CRFClassifier;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import edu.stanford.nlp.ling.CoreLabel;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8
036535fcd179 anteater
jdamerow
parents:
diff changeset
9
036535fcd179 anteater
jdamerow
parents:
diff changeset
10
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 /** This is a demo of calling CRFClassifier programmatically.
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 * <p>
036535fcd179 anteater
jdamerow
parents:
diff changeset
13 * Usage: <code> java -mx400m -cp "stanford-ner.jar:." NERDemo [serializedClassifier [fileName]]</code>
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 * <p>
036535fcd179 anteater
jdamerow
parents:
diff changeset
15 * If arguments aren't specified, they default to
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 * ner-eng-ie.crf-3-all2006.ser.gz and some hardcoded sample text.
036535fcd179 anteater
jdamerow
parents:
diff changeset
17 * <p>
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 * To use CRFClassifier from the command line:
036535fcd179 anteater
jdamerow
parents:
diff changeset
19 * java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 * [classifier] -textFile [file]
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 * Or if the file is already tokenized and one word per line, perhaps in
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 * a tab-separated value format with extra columns for part-of-speech tag,
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 * etc., use the version below (note the 's' instead of the 'x'):
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 * java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier
036535fcd179 anteater
jdamerow
parents:
diff changeset
25 * [classifier] -testFile [file]
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 *
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 * @author Jenny Finkel
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 * @author Christopher Manning
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 */
036535fcd179 anteater
jdamerow
parents:
diff changeset
30
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 public class NERDemo {
036535fcd179 anteater
jdamerow
parents:
diff changeset
32
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 public static void main(String[] args) throws IOException {
036535fcd179 anteater
jdamerow
parents:
diff changeset
34
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 String serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz";
036535fcd179 anteater
jdamerow
parents:
diff changeset
36
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 if (args.length > 0) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 serializedClassifier = args[0];
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
40
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(serializedClassifier);
036535fcd179 anteater
jdamerow
parents:
diff changeset
42
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 /* For either a file to annotate or for the hardcoded text example,
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 this demo file shows two ways to process the output, for teaching
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 purposes. For the file, it shows both how to run NER on a String
036535fcd179 anteater
jdamerow
parents:
diff changeset
46 and how to run it on a whole file. For the hard-coded String,
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 it shows how to run it on a single sentence, and how to do this
036535fcd179 anteater
jdamerow
parents:
diff changeset
48 and produce an inline XML output format.
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 */
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 String content = "Notice is hereby given that Paul E. Nachtigall Paul Szelag, Ph.D., "
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 + "Director, Marine Mammal Research Program, Hawaii Institute of Marine "
036535fcd179 anteater
jdamerow
parents:
diff changeset
52 + "Biology, University of Hawaii, P.O. Box 1106, Kailua, Hawaii 96734, "
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 + "has been issued a permit to conduct scientific research on three captive "
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 + "bottlenose dolphins (Tursiops truncatus) and one captive false killer whale "
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 + "(Pseudorca crassidens) for scientific research at the University of Hawaii.";
036535fcd179 anteater
jdamerow
parents:
diff changeset
56
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 String fileContents = content; //IOUtils.slurpFile(args[1]);
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 List<List<CoreLabel>> out = classifier.classify(fileContents);
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 for (List<CoreLabel> sentence : out) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
60 for (CoreLabel word : sentence) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 System.out.print(word.word() + '/' + word.get(AnswerAnnotation.class) + ' ');
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
63 System.out.println();
036535fcd179 anteater
jdamerow
parents:
diff changeset
64 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 // out = classifier.classifyFile(args[1]);
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 // for (List<CoreLabel> sentence : out) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
67 // for (CoreLabel word : sentence) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
68 // System.out.print(word.word() + '/' + word.get(AnswerAnnotation.class) + ' ');
036535fcd179 anteater
jdamerow
parents:
diff changeset
69 // }
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 // System.out.println();
036535fcd179 anteater
jdamerow
parents:
diff changeset
71
036535fcd179 anteater
jdamerow
parents:
diff changeset
72
036535fcd179 anteater
jdamerow
parents:
diff changeset
73 // } else {
036535fcd179 anteater
jdamerow
parents:
diff changeset
74 // String s1 = "Good afternoon Rajat Raina, how are you today?";
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 // String s2 = "I go to school at Stanford University, which is located in California.";
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 // System.out.println(classifier.classifyToString(s1));
036535fcd179 anteater
jdamerow
parents:
diff changeset
77 // System.out.println(classifier.classifyWithInlineXML(s2));
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 // System.out.println(classifier.classifyToString(s2, "xml", true));
036535fcd179 anteater
jdamerow
parents:
diff changeset
79 // int i=0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
80 // for (List<CoreLabel> lcl : classifier.classify(s2)) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 // for (CoreLabel cl : lcl) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 // System.out.println(i++ + ":");
036535fcd179 anteater
jdamerow
parents:
diff changeset
83 // System.out.println(cl);
036535fcd179 anteater
jdamerow
parents:
diff changeset
84 // }
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 // }
036535fcd179 anteater
jdamerow
parents:
diff changeset
86 // }
036535fcd179 anteater
jdamerow
parents:
diff changeset
87 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
88
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 }