diff src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java @ 8:919e9f3b5efd

neue klassen zur textanalyse (stanford parser eingebaut) alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author dwinter
date Thu, 21 Jun 2012 17:08:22 +0200
parents fb3f3df002df
children
line wrap: on
line diff
--- a/src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java	Wed Feb 09 16:36:36 2011 +0100
+++ b/src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java	Thu Jun 21 17:08:22 2012 +0200
@@ -2,19 +2,25 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
 import org.jdom.Attribute;
 import org.jdom.Document;
 import org.jdom.Element;
 import org.jdom.JDOMException;
+import org.jdom.Text;
 import org.jdom.input.SAXBuilder;
 import org.jdom.xpath.XPath;
 
 
+
 public class LexDumpImporter {
 
 	private Document doc;
+	private Logger logger;
 
 	public LexDumpImporter(String path) throws JDOMException, IOException{
 		
@@ -22,6 +28,7 @@
 		
 		doc = builder.build(new File(path));
 		
+		logger = Logger.getRootLogger();
 	}
 	
 	@SuppressWarnings("unchecked")
@@ -40,7 +47,18 @@
 		if (node==null){
 			return "";
 		} else if (Element.class.isInstance(node)){
-			return ((Element)node).getTextTrim();
+			List<String> retArray=new ArrayList<String>();
+			for (Object o: ((Element)node).getContent())
+			{
+				if(Element.class.isInstance(o)){
+					retArray.add(((Element)o).getTextTrim());
+				} else if(Text.class.isInstance(o)) {
+					retArray.add(((Text)o).getTextTrim());
+				}
+			}
+			Object[] X = retArray.toArray();
+			return StringUtils.join(X,' ');
+			//return ((Element)node).getTextTrim();
 		} else if (Attribute.class.isInstance(node)){
 			return ((Attribute)node).getValue();
 		}