Mercurial > hg > anteater
diff src/NERDemo.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/NERDemo.java Fri Sep 14 10:30:43 2012 +0200 @@ -0,0 +1,89 @@ +import java.io.IOException; +import java.util.List; + +import edu.stanford.nlp.ie.AbstractSequenceClassifier; +import edu.stanford.nlp.ie.crf.CRFClassifier; +import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation; +import edu.stanford.nlp.ling.CoreLabel; + + + +/** This is a demo of calling CRFClassifier programmatically. + * <p> + * Usage: <code> java -mx400m -cp "stanford-ner.jar:." NERDemo [serializedClassifier [fileName]]</code> + * <p> + * If arguments aren't specified, they default to + * ner-eng-ie.crf-3-all2006.ser.gz and some hardcoded sample text. + * <p> + * To use CRFClassifier from the command line: + * java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier + * [classifier] -textFile [file] + * Or if the file is already tokenized and one word per line, perhaps in + * a tab-separated value format with extra columns for part-of-speech tag, + * etc., use the version below (note the 's' instead of the 'x'): + * java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier + * [classifier] -testFile [file] + * + * @author Jenny Finkel + * @author Christopher Manning + */ + +public class NERDemo { + + public static void main(String[] args) throws IOException { + + String serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz"; + + if (args.length > 0) { + serializedClassifier = args[0]; + } + + AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(serializedClassifier); + + /* For either a file to annotate or for the hardcoded text example, + this demo file shows two ways to process the output, for teaching + purposes. For the file, it shows both how to run NER on a String + and how to run it on a whole file. For the hard-coded String, + it shows how to run it on a single sentence, and how to do this + and produce an inline XML output format. + */ + String content = "Notice is hereby given that Paul E. Nachtigall Paul Szelag, Ph.D., " + + "Director, Marine Mammal Research Program, Hawaii Institute of Marine " + + "Biology, University of Hawaii, P.O. Box 1106, Kailua, Hawaii 96734, " + + "has been issued a permit to conduct scientific research on three captive " + + "bottlenose dolphins (Tursiops truncatus) and one captive false killer whale " + + "(Pseudorca crassidens) for scientific research at the University of Hawaii."; + + String fileContents = content; //IOUtils.slurpFile(args[1]); + List<List<CoreLabel>> out = classifier.classify(fileContents); + for (List<CoreLabel> sentence : out) { + for (CoreLabel word : sentence) { + System.out.print(word.word() + '/' + word.get(AnswerAnnotation.class) + ' '); + } + System.out.println(); + } +// out = classifier.classifyFile(args[1]); +// for (List<CoreLabel> sentence : out) { +// for (CoreLabel word : sentence) { +// System.out.print(word.word() + '/' + word.get(AnswerAnnotation.class) + ' '); +// } +// System.out.println(); + + +// } else { +// String s1 = "Good afternoon Rajat Raina, how are you today?"; +// String s2 = "I go to school at Stanford University, which is located in California."; +// System.out.println(classifier.classifyToString(s1)); +// System.out.println(classifier.classifyWithInlineXML(s2)); +// System.out.println(classifier.classifyToString(s2, "xml", true)); +// int i=0; +// for (List<CoreLabel> lcl : classifier.classify(s2)) { +// for (CoreLabel cl : lcl) { +// System.out.println(i++ + ":"); +// System.out.println(cl); +// } +// } +// } + } + +}