package oboannotator.annotator; import java.util.regex.Matcher; import java.util.regex.Pattern; import oboannotator.typesys.MedlineDocument; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.jcas.JCas; /** * Example annotator that detects room numbers using Java 1.4 regular expressions. */ public class MedlineDocumentAnnotator extends JCasAnnotator_ImplBase { //(^UI|PMID)\\s*\\-(.*?)^([A-Z][A-Z]+\\s*\\-) //private Pattern docPattern = Pattern.compile("(^UI|PMID)[\\s]*\\-[\\s]+(.*?)([\n]{2})", Pattern.DOTALL|Pattern.MULTILINE); private Pattern docPattern = Pattern.compile("(^UI|PMID)[\\s]*\\-[\\s]+(.*?)([\n]{2})", Pattern.DOTALL|Pattern.MULTILINE); /** * @see JCasAnnotator_ImplBase#process(JCas) */ public void process(JCas aJCas) { // get document text String docText = aJCas.getDocumentText(); // search for Yorktown room numbers Matcher matcher = docPattern.matcher(docText); while (matcher.find()) { // found one - create annotation MedlineDocument annotation = new MedlineDocument(aJCas); annotation.setBegin(matcher.start()); annotation.setEnd(matcher.end()); annotation.setDocID(matcher.group(2)); annotation.setText(matcher.group(1) + " - " + matcher.group(2)); annotation.addToIndexes(); } } }