diff src/NERDemo.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/NERDemo.java	Fri Sep 14 10:30:43 2012 +0200
@@ -0,0 +1,89 @@
+import java.io.IOException;
+import java.util.List;
+
+import edu.stanford.nlp.ie.AbstractSequenceClassifier;
+import edu.stanford.nlp.ie.crf.CRFClassifier;
+import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation;
+import edu.stanford.nlp.ling.CoreLabel;
+
+
+
+/** This is a demo of calling CRFClassifier programmatically.
+ *  <p>
+ *  Usage: <code> java -mx400m -cp "stanford-ner.jar:." NERDemo [serializedClassifier [fileName]]</code>
+ *  <p>
+ *  If arguments aren't specified, they default to
+ *  ner-eng-ie.crf-3-all2006.ser.gz and some hardcoded sample text.
+ *  <p>
+ *  To use CRFClassifier from the command line:
+ *  java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier
+ *      [classifier] -textFile [file]
+ *  Or if the file is already tokenized and one word per line, perhaps in
+ *  a tab-separated value format with extra columns for part-of-speech tag,
+ *  etc., use the version below (note the 's' instead of the 'x'):
+ *  java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier
+ *      [classifier] -testFile [file]
+ *
+ *  @author Jenny Finkel
+ *  @author Christopher Manning
+ */
+
+public class NERDemo {
+
+    public static void main(String[] args) throws IOException {
+
+      String serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz";
+
+      if (args.length > 0) {
+        serializedClassifier = args[0];
+      }
+
+      AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(serializedClassifier);
+
+      /* For either a file to annotate or for the hardcoded text example,
+         this demo file shows two ways to process the output, for teaching
+         purposes.  For the file, it shows both how to run NER on a String
+         and how to run it on a whole file.  For the hard-coded String,
+         it shows how to run it on a single sentence, and how to do this
+         and produce an inline XML output format.
+      */
+      String content = "Notice is hereby given that Paul E. Nachtigall Paul Szelag, Ph.D., "
+			+ "Director, Marine Mammal Research Program, Hawaii Institute of Marine "
+			+ "Biology, University of Hawaii, P.O. Box 1106, Kailua, Hawaii 96734, "
+			+ "has been issued a permit to conduct scientific research on three captive "
+			+ "bottlenose dolphins (Tursiops truncatus) and one captive false killer whale "
+			+ "(Pseudorca crassidens) for scientific research at the University of Hawaii.";
+      
+        String fileContents = content; //IOUtils.slurpFile(args[1]);
+        List<List<CoreLabel>> out = classifier.classify(fileContents);
+        for (List<CoreLabel> sentence : out) {
+          for (CoreLabel word : sentence) {
+            System.out.print(word.word() + '/' + word.get(AnswerAnnotation.class) + ' ');
+          }
+          System.out.println();
+        }
+//        out = classifier.classifyFile(args[1]);
+//        for (List<CoreLabel> sentence : out) {
+//          for (CoreLabel word : sentence) {
+//            System.out.print(word.word() + '/' + word.get(AnswerAnnotation.class) + ' ');
+//          }
+//          System.out.println();
+        
+
+//      } else {
+//        String s1 = "Good afternoon Rajat Raina, how are you today?";
+//        String s2 = "I go to school at Stanford University, which is located in California.";
+//        System.out.println(classifier.classifyToString(s1));
+//        System.out.println(classifier.classifyWithInlineXML(s2));
+//        System.out.println(classifier.classifyToString(s2, "xml", true));
+//        int i=0;
+//        for (List<CoreLabel> lcl : classifier.classify(s2)) {
+//          for (CoreLabel cl : lcl) {
+//            System.out.println(i++ + ":");
+//            System.out.println(cl);
+//          }
+//        }
+//      }
+    }
+
+}