view src/NERDemo.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line source

import java.io.IOException;
import java.util.List;

import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.ie.crf.CRFClassifier;
import edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation;
import edu.stanford.nlp.ling.CoreLabel;



/** This is a demo of calling CRFClassifier programmatically.
 *  <p>
 *  Usage: <code> java -mx400m -cp "stanford-ner.jar:." NERDemo [serializedClassifier [fileName]]</code>
 *  <p>
 *  If arguments aren't specified, they default to
 *  ner-eng-ie.crf-3-all2006.ser.gz and some hardcoded sample text.
 *  <p>
 *  To use CRFClassifier from the command line:
 *  java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier
 *      [classifier] -textFile [file]
 *  Or if the file is already tokenized and one word per line, perhaps in
 *  a tab-separated value format with extra columns for part-of-speech tag,
 *  etc., use the version below (note the 's' instead of the 'x'):
 *  java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier
 *      [classifier] -testFile [file]
 *
 *  @author Jenny Finkel
 *  @author Christopher Manning
 */

public class NERDemo {

    public static void main(String[] args) throws IOException {

      String serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz";

      if (args.length > 0) {
        serializedClassifier = args[0];
      }

      AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifierNoExceptions(serializedClassifier);

      /* For either a file to annotate or for the hardcoded text example,
         this demo file shows two ways to process the output, for teaching
         purposes.  For the file, it shows both how to run NER on a String
         and how to run it on a whole file.  For the hard-coded String,
         it shows how to run it on a single sentence, and how to do this
         and produce an inline XML output format.
      */
      String content = "Notice is hereby given that Paul E. Nachtigall Paul Szelag, Ph.D., "
			+ "Director, Marine Mammal Research Program, Hawaii Institute of Marine "
			+ "Biology, University of Hawaii, P.O. Box 1106, Kailua, Hawaii 96734, "
			+ "has been issued a permit to conduct scientific research on three captive "
			+ "bottlenose dolphins (Tursiops truncatus) and one captive false killer whale "
			+ "(Pseudorca crassidens) for scientific research at the University of Hawaii.";
      
        String fileContents = content; //IOUtils.slurpFile(args[1]);
        List<List<CoreLabel>> out = classifier.classify(fileContents);
        for (List<CoreLabel> sentence : out) {
          for (CoreLabel word : sentence) {
            System.out.print(word.word() + '/' + word.get(AnswerAnnotation.class) + ' ');
          }
          System.out.println();
        }
//        out = classifier.classifyFile(args[1]);
//        for (List<CoreLabel> sentence : out) {
//          for (CoreLabel word : sentence) {
//            System.out.print(word.word() + '/' + word.get(AnswerAnnotation.class) + ' ');
//          }
//          System.out.println();
        

//      } else {
//        String s1 = "Good afternoon Rajat Raina, how are you today?";
//        String s2 = "I go to school at Stanford University, which is located in California.";
//        System.out.println(classifier.classifyToString(s1));
//        System.out.println(classifier.classifyWithInlineXML(s2));
//        System.out.println(classifier.classifyToString(s2, "xml", true));
//        int i=0;
//        for (List<CoreLabel> lcl : classifier.classify(s2)) {
//          for (CoreLabel cl : lcl) {
//            System.out.println(i++ + ":");
//            System.out.println(cl);
//          }
//        }
//      }
    }

}