package oboannotator.annotator; import java.util.regex.Matcher; import java.util.regex.Pattern; import oboannotator.typesys.MedlineField; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.jcas.JCas; /** * Example annotator that detects room numbers using Java 1.4 regular * expressions. */ public class MedlineFieldAnnotator extends JCasAnnotator_ImplBase { private Pattern uiPattern = Pattern.compile("(^UI|PMID)\\s*\\-(.*?)^([A-Z][A-Z]+\\s*\\-)", Pattern.DOTALL | Pattern.MULTILINE); private Pattern tiPattern = Pattern.compile("(^TI)\\s*\\-(.*?)^([A-Z][A-Z]+\\s*\\-)", Pattern.DOTALL | Pattern.MULTILINE); private Pattern abPattern = Pattern.compile("(^AB)\\s*\\-(.*?)^([A-Z][A-Z]+\\s*\\-)", Pattern.DOTALL | Pattern.MULTILINE); private Pattern ontologyPattern = Pattern.compile("([A-Z]*?)_(\\d+)"); /** * @see JCasAnnotator_ImplBase#process(JCas) */ public void process(JCas aJCas) { // get document text String docText = aJCas.getDocumentText(); // search for Yorktown room numbers Matcher ontologyMatcher = ontologyPattern.matcher(docText); while (ontologyMatcher.find()) { // found one - create annotation MedlineField annotation = new MedlineField(aJCas); annotation.setBegin(ontologyMatcher.start()); annotation.setEnd(ontologyMatcher.end()); annotation.setName(ontologyMatcher.group(1)); annotation.setText(ontologyMatcher.group(2)); //System.out.println("Name: " + ontologyMatcher.group(1) + "\n Text: "+ ontologyMatcher.group(2)); annotation.addToIndexes(); } Matcher matcher = uiPattern.matcher(docText); while (matcher.find()) { // found one - create annotation MedlineField annotation = new MedlineField(aJCas); annotation.setBegin(matcher.start()); annotation.setEnd(matcher.end()); annotation.setName(matcher.group(1)); annotation.setText(matcher.group(2)); //System.out.println("Name: " + annotation.getName() + "\n Text: "+ annotation.getText()); annotation.addToIndexes(); } Matcher matcher2 = abPattern.matcher(docText); while (matcher2.find()) { // found one - create annotation MedlineField annotation = new MedlineField(aJCas); annotation.setBegin(matcher2.start()); annotation.setEnd(matcher2.end()); annotation.setName(matcher2.group(1)); annotation.setText(matcher2.group(2)); //System.out.println("Name: [" + annotation.getName() + "]\n Text: <" // + annotation.getText() + ">") ; annotation.addToIndexes(); } Matcher matcher3 = tiPattern.matcher(docText); while (matcher3.find()) { // found one - create annotation MedlineField annotation = new MedlineField(aJCas); annotation.setBegin(matcher3.start()); annotation.setEnd(matcher3.end()); annotation.setName(matcher3.group(1)); annotation.setText(matcher3.group(2)); //System.out.println("Name: " + annotation.getName() + "\n Text: " // + annotation.getText()); annotation.addToIndexes(); } } }