0
|
1 import java.io.IOException;
|
|
2 import java.util.List;
|
|
3
|
|
4 import edu.stanford.nlp.ie.AbstractSequenceClassifier;
|
|
5 import edu.stanford.nlp.ie.crf.CRFClassifier;
|
|
6 import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation;
|
|
7 import edu.stanford.nlp.ling.CoreLabel;
|
|
8
|
|
9
|
|
10
|
|
11 /** This is a demo of calling CRFClassifier programmatically.
|
|
12 * <p>
|
|
13 * Usage: <code> java -mx400m -cp "stanford-ner.jar:." NERDemo [serializedClassifier [fileName]]</code>
|
|
14 * <p>
|
|
15 * If arguments aren't specified, they default to
|
|
16 * ner-eng-ie.crf-3-all2006.ser.gz and some hardcoded sample text.
|
|
17 * <p>
|
|
18 * To use CRFClassifier from the command line:
|
|
19 * java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier
|
|
20 * [classifier] -textFile [file]
|
|
21 * Or if the file is already tokenized and one word per line, perhaps in
|
|
22 * a tab-separated value format with extra columns for part-of-speech tag,
|
|
23 * etc., use the version below (note the 's' instead of the 'x'):
|
|
24 * java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier
|
|
25 * [classifier] -testFile [file]
|
|
26 *
|
|
27 * @author Jenny Finkel
|
|
28 * @author Christopher Manning
|
|
29 */
|
|
30
|
|
31 public class NERDemo {
|
|
32
|
|
33 public static void main(String[] args) throws IOException {
|
|
34
|
|
35 String serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz";
|
|
36
|
|
37 if (args.length > 0) {
|
|
38 serializedClassifier = args[0];
|
|
39 }
|
|
40
|
|
41 AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(serializedClassifier);
|
|
42
|
|
43 /* For either a file to annotate or for the hardcoded text example,
|
|
44 this demo file shows two ways to process the output, for teaching
|
|
45 purposes. For the file, it shows both how to run NER on a String
|
|
46 and how to run it on a whole file. For the hard-coded String,
|
|
47 it shows how to run it on a single sentence, and how to do this
|
|
48 and produce an inline XML output format.
|
|
49 */
|
|
50 String content = "Notice is hereby given that Paul E. Nachtigall Paul Szelag, Ph.D., "
|
|
51 + "Director, Marine Mammal Research Program, Hawaii Institute of Marine "
|
|
52 + "Biology, University of Hawaii, P.O. Box 1106, Kailua, Hawaii 96734, "
|
|
53 + "has been issued a permit to conduct scientific research on three captive "
|
|
54 + "bottlenose dolphins (Tursiops truncatus) and one captive false killer whale "
|
|
55 + "(Pseudorca crassidens) for scientific research at the University of Hawaii.";
|
|
56
|
|
57 String fileContents = content; //IOUtils.slurpFile(args[1]);
|
|
58 List<List<CoreLabel>> out = classifier.classify(fileContents);
|
|
59 for (List<CoreLabel> sentence : out) {
|
|
60 for (CoreLabel word : sentence) {
|
|
61 System.out.print(word.word() + '/' + word.get(AnswerAnnotation.class) + ' ');
|
|
62 }
|
|
63 System.out.println();
|
|
64 }
|
|
65 // out = classifier.classifyFile(args[1]);
|
|
66 // for (List<CoreLabel> sentence : out) {
|
|
67 // for (CoreLabel word : sentence) {
|
|
68 // System.out.print(word.word() + '/' + word.get(AnswerAnnotation.class) + ' ');
|
|
69 // }
|
|
70 // System.out.println();
|
|
71
|
|
72
|
|
73 // } else {
|
|
74 // String s1 = "Good afternoon Rajat Raina, how are you today?";
|
|
75 // String s2 = "I go to school at Stanford University, which is located in California.";
|
|
76 // System.out.println(classifier.classifyToString(s1));
|
|
77 // System.out.println(classifier.classifyWithInlineXML(s2));
|
|
78 // System.out.println(classifier.classifyToString(s2, "xml", true));
|
|
79 // int i=0;
|
|
80 // for (List<CoreLabel> lcl : classifier.classify(s2)) {
|
|
81 // for (CoreLabel cl : lcl) {
|
|
82 // System.out.println(i++ + ":");
|
|
83 // System.out.println(cl);
|
|
84 // }
|
|
85 // }
|
|
86 // }
|
|
87 }
|
|
88
|
|
89 }
|