view src/de/mpiwg/anteater/xml/impl/LinnaeusParser.java @ 5:35d8c78ccd36

include linneaus findings
author jdamerow
date Thu, 25 Oct 2012 15:26:03 -0700
parents
children
line wrap: on
line source

package de.mpiwg.anteater.xml.impl;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.jdom2.Attribute;
import org.jdom2.DataConversionException;
import org.jdom2.Element;

import de.mpiwg.anteater.species.scientific.ScientificName;
import de.mpiwg.anteater.xml.ICommonNameFinderParser;

public class LinnaeusParser extends JDOMParser implements ICommonNameFinderParser {

	public LinnaeusParser(String content) {
		super(content, false);
	}	
	
	public LinnaeusParser(InputStream stream) {
		super(stream);
	}
	
	@Override
	public List<ScientificName> parseSpeciesNames() {
		List<Element> names = executeXPath("/linnaeus/species", null);
		
		List<ScientificName> commonNames = new ArrayList<ScientificName>();
		for (Element name : names) {
			ScientificName commonName = new ScientificName();
			
			Attribute id = name.getAttribute("id");
			commonName.setNcbiId(id.getValue());
			commonName.setIdentifiedName(id.getValue());
			
			Attribute start = name.getAttribute("start");
			try {
				commonName.setStart(start.getIntValue());
			} catch (DataConversionException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
			Attribute end = name.getAttribute("end");
			try {
				commonName.setLength(end.getIntValue() - start.getIntValue());
			} catch (DataConversionException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			
			Attribute text = name.getAttribute("text");
			commonName.setScientificName(text.getValue());
			commonName.setReferenceInText(text.getValue());
			
			commonNames.add(commonName);
		}
		
		return commonNames;
	}

}