Mercurial > hg > anteater
diff src/de/mpiwg/anteater/xml/impl/GNRDXMLParser.java @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/anteater/xml/impl/GNRDXMLParser.java Fri Sep 14 10:30:43 2012 +0200 @@ -0,0 +1,103 @@ +package de.mpiwg.anteater.xml.impl; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.jdom2.Attribute; +import org.jdom2.DataConversionException; +import org.jdom2.Element; +import org.jdom2.Namespace; + +import de.mpiwg.anteater.species.scientific.ScientificName; +import de.mpiwg.anteater.xml.INameFinderParser; + +public class GNRDXMLParser extends JDOMParser implements INameFinderParser { + + public GNRDXMLParser(String content) { + super(content, false); + } + + public GNRDXMLParser(InputStream stream) { + super(stream); + } + + + /* (non-Javadoc) + * @see de.mpiwg.anteater.xml.impl.INameFinderParser#getStatus() + */ + @Override + public String getStatus() { + List<Element> results = executeXPath("//status", null); + if (results.size() > 0) { + Element statusNode = results.get(0); + return stripText(statusNode); + } + return null; + } + + /* (non-Javadoc) + * @see de.mpiwg.anteater.xml.impl.INameFinderParser#getTokenURL() + */ + @Override + public String getTokenURL() { + List<Element> results = executeXPath("//token_url", null); + if (results.size() > 0) { + Element statusNode = results.get(0); + return stripText(statusNode); + } + return null; + } + + @Override + public List<ScientificName> parseScientificNames() { + List<Element> names = executeXPath("/result/names/name", null); + + List<ScientificName> scientificNames = new ArrayList<ScientificName>(); + for (Element name : names) { + ScientificName scienName = new ScientificName(); + + Element verbNode = name.getChild("verbatim"); + scienName.setReferenceInText(verbNode.getText()); + + Element identNode = name.getChild("identifiedName"); + if (identNode != null) + scienName.setIdentifiedName(identNode.getText()); + + Element scientNode = name.getChild("scientificName", Namespace.getNamespace("http://rs.tdwg.org/dwc/terms/")); + if (scientNode != null) + scienName.setScientificName(scientNode.getText()); + + Element offsetNode = name.getChild("offset"); + if (offsetNode != null) { + Attribute startAttr = offsetNode.getAttribute("start"); + int start = -1; + if (startAttr != null) { + try { + start = startAttr.getIntValue(); + } catch (DataConversionException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + if (start > -1) { + scienName.setStart(start); + + Attribute endAttr = offsetNode.getAttribute("end"); + if (endAttr != null) { + try { + scienName.setLength(endAttr.getIntValue() - start + 1); + } catch (DataConversionException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } + } + } + + scientificNames.add(scienName); + } + + return scientificNames; + } +}