diff src/de/mpiwg/anteater/xml/impl/GNRDXMLParser.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/anteater/xml/impl/GNRDXMLParser.java	Fri Sep 14 10:30:43 2012 +0200
@@ -0,0 +1,103 @@
+package de.mpiwg.anteater.xml.impl;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.jdom2.Attribute;
+import org.jdom2.DataConversionException;
+import org.jdom2.Element;
+import org.jdom2.Namespace;
+
+import de.mpiwg.anteater.species.scientific.ScientificName;
+import de.mpiwg.anteater.xml.INameFinderParser;
+
+public class GNRDXMLParser extends JDOMParser implements INameFinderParser {
+
+	public GNRDXMLParser(String content) {
+		super(content, false);
+	}	
+	
+	public GNRDXMLParser(InputStream stream) {
+		super(stream);
+	}
+
+
+	/* (non-Javadoc)
+	 * @see de.mpiwg.anteater.xml.impl.INameFinderParser#getStatus()
+	 */
+	@Override
+	public String getStatus() {
+		List<Element> results = executeXPath("//status", null);
+		if (results.size() > 0) {
+			Element statusNode = results.get(0);
+			return stripText(statusNode);
+		}
+		return null;
+	}
+	
+	/* (non-Javadoc)
+	 * @see de.mpiwg.anteater.xml.impl.INameFinderParser#getTokenURL()
+	 */
+	@Override
+	public String getTokenURL() {
+		List<Element> results = executeXPath("//token_url", null);
+		if (results.size() > 0) {
+			Element statusNode = results.get(0);
+			return stripText(statusNode);
+		}
+		return null;
+	}
+	
+	@Override
+	public List<ScientificName> parseScientificNames() {
+		List<Element> names = executeXPath("/result/names/name", null);
+		
+		List<ScientificName> scientificNames = new ArrayList<ScientificName>();
+		for (Element name : names) {
+			ScientificName scienName = new ScientificName();
+			
+			Element verbNode = name.getChild("verbatim");
+			scienName.setReferenceInText(verbNode.getText());
+			
+			Element identNode = name.getChild("identifiedName");
+			if (identNode != null)
+				scienName.setIdentifiedName(identNode.getText());
+			
+			Element scientNode = name.getChild("scientificName", Namespace.getNamespace("http://rs.tdwg.org/dwc/terms/"));
+			if (scientNode != null)
+				scienName.setScientificName(scientNode.getText());
+			
+			Element offsetNode = name.getChild("offset");
+			if (offsetNode != null) {
+				Attribute startAttr = offsetNode.getAttribute("start");
+				int start = -1;
+				if (startAttr != null) {
+					try {
+						start = startAttr.getIntValue();
+					} catch (DataConversionException e) {
+						// TODO Auto-generated catch block
+						e.printStackTrace();
+					}
+				}
+				if (start > -1) {
+					scienName.setStart(start);
+				
+					Attribute endAttr = offsetNode.getAttribute("end");
+					if (endAttr != null) {
+						try {
+							scienName.setLength(endAttr.getIntValue() - start + 1);
+						} catch (DataConversionException e) {
+							// TODO Auto-generated catch block
+							e.printStackTrace();
+						}
+					}
+				}		
+			}
+			
+			scientificNames.add(scienName);
+		}
+		
+		return scientificNames;
+	}
+}