changeset 8:919e9f3b5efd

neue klassen zur textanalyse (stanford parser eingebaut) alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author dwinter
date Thu, 21 Jun 2012 17:08:22 +0200
parents 19e40abb3e8a
children 4392a6adf85a
files .classpath libs/commons-lang-2.0.jar protege.properties src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java src/de/mpiwg/dwinter/duomo/lexdump/LexOWLTransformer.java src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java src/de/mpiwg/dwinter/duomo/stanford/Analyse.java src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java src/de/mpiwg/dwinter/duomo/stanford/ParserDemo.java src/de/mpiwg/dwinter/duomo/stanford/ParserDemo2.java src/de/mpiwg/dwinter/duomo/stanford/TokenWithEvent.java
diffstat 11 files changed, 1016 insertions(+), 47 deletions(-) [+]
line wrap: on
line diff
--- a/.classpath	Wed Feb 09 16:36:36 2011 +0100
+++ b/.classpath	Thu Jun 21 17:08:22 2012 +0200
@@ -7,5 +7,12 @@
 	<classpathentry kind="lib" path="libs/jaxen-1.1.1.jar"/>
 	<classpathentry kind="lib" path="libs/jdom-1.0.jar"/>
 	<classpathentry kind="lib" path="libs/log4j-1.2.15.jar"/>
+	<classpathentry kind="lib" path="libs/commons-lang-2.0.jar"/>
+	<classpathentry kind="lib" path="/Users/dwinter/Documents/Projekte/Diss - data-mining/stanford-parser/stanford-parser-2012-03-09/stanford-parser-2012-03-09-models.jar"/>
+	<classpathentry kind="lib" path="/Users/dwinter/Documents/Projekte/Diss - data-mining/stanford-parser/stanford-parser-2012-03-09/stanford-parser.jar" sourcepath="/Users/dwinter/Documents/Projekte/Diss - data-mining/stanford-parser/stanford-parser-2012-03-09/stanford-parser-2012-03-09-sources.jar">
+		<attributes>
+			<attribute name="javadoc_location" value="jar:file:/Users/dwinter/Documents/Projekte/Diss - data-mining/stanford-parser/stanford-parser-2012-03-09/stanford-parser-2012-03-09-javadoc.jar!/"/>
+		</attributes>
+	</classpathentry>
 	<classpathentry kind="output" path="bin"/>
 </classpath>
Binary file libs/commons-lang-2.0.jar has changed
--- a/protege.properties	Wed Feb 09 16:36:36 2011 +0100
+++ b/protege.properties	Thu Jun 21 17:08:22 2012 +0200
@@ -1,5 +1,5 @@
 #Protege Properties
-#Wed Feb 09 16:23:54 CET 2011
+#Thu Jun 21 17:04:13 CEST 2012
 SwitchableClassDefinitionType=edu.stanford.smi.protegex.owl.ui.cls.LogicClassDefinitionWidgetType
 history.projects.reopen=file\:/Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/examples/newspaper/newspaper.pprj,file\:/Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/examples/sqwrl/SQWRLExamples.pprj,file\:/Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/examples/pizza/pizza.owl.pprj
 OntURIBase=http\://www.owl-ontologies.com
--- a/src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java	Wed Feb 09 16:36:36 2011 +0100
+++ b/src/de/mpiwg/dwinter/duomo/lexdump/LexDumpImporter.java	Thu Jun 21 17:08:22 2012 +0200
@@ -2,19 +2,25 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
 import org.jdom.Attribute;
 import org.jdom.Document;
 import org.jdom.Element;
 import org.jdom.JDOMException;
+import org.jdom.Text;
 import org.jdom.input.SAXBuilder;
 import org.jdom.xpath.XPath;
 
 
+
 public class LexDumpImporter {
 
 	private Document doc;
+	private Logger logger;
 
 	public LexDumpImporter(String path) throws JDOMException, IOException{
 		
@@ -22,6 +28,7 @@
 		
 		doc = builder.build(new File(path));
 		
+		logger = Logger.getRootLogger();
 	}
 	
 	@SuppressWarnings("unchecked")
@@ -40,7 +47,18 @@
 		if (node==null){
 			return "";
 		} else if (Element.class.isInstance(node)){
-			return ((Element)node).getTextTrim();
+			List<String> retArray=new ArrayList<String>();
+			for (Object o: ((Element)node).getContent())
+			{
+				if(Element.class.isInstance(o)){
+					retArray.add(((Element)o).getTextTrim());
+				} else if(Text.class.isInstance(o)) {
+					retArray.add(((Text)o).getTextTrim());
+				}
+			}
+			Object[] X = retArray.toArray();
+			return StringUtils.join(X,' ');
+			//return ((Element)node).getTextTrim();
 		} else if (Attribute.class.isInstance(node)){
 			return ((Attribute)node).getValue();
 		}
--- a/src/de/mpiwg/dwinter/duomo/lexdump/LexOWLTransformer.java	Wed Feb 09 16:36:36 2011 +0100
+++ b/src/de/mpiwg/dwinter/duomo/lexdump/LexOWLTransformer.java	Thu Jun 21 17:08:22 2012 +0200
@@ -26,7 +26,9 @@
 import com.sun.xml.internal.ws.developer.MemberSubmissionEndpointReference.Elements;
 
 import edu.stanford.smi.protege.exception.OntologyLoadException;
+import edu.stanford.smi.protege.model.Facet;
 import edu.stanford.smi.protege.model.Instance;
+import edu.stanford.smi.protege.model.Slot;
 import edu.stanford.smi.protegex.owl.jena.JenaOWLModel;
 import edu.stanford.smi.protegex.owl.model.OWLAllValuesFrom;
 import edu.stanford.smi.protegex.owl.model.OWLClass;
@@ -38,9 +40,17 @@
 import edu.stanford.smi.protegex.owl.model.RDFList;
 import edu.stanford.smi.protegex.owl.model.RDFProperty;
 import edu.stanford.smi.protegex.owl.model.RDFResource;
+import edu.stanford.smi.protegex.owl.model.RDFSClass;
 import edu.stanford.smi.protegex.owl.model.impl.AbstractOWLQuantifierRestriction;
+import edu.stanford.smi.protegex.owl.model.impl.AbstractOWLRestriction;
+import edu.stanford.smi.protegex.owl.model.impl.DefaultOWLAllValuesFrom;
 import edu.stanford.smi.protegex.owl.model.impl.DefaultOWLUnionClass;
 
+/**
+ * @author dwinter
+ *
+ *Transformiert die Lex Files in OWL
+ */
 public class LexOWLTransformer {
 	private Logger logger = Logger.getRootLogger();
 	private FileWriter missing;
@@ -106,9 +116,14 @@
 
 		LexOWLTransformer tf = new LexOWLTransformer(owlDoc, lexDoc);
 		tf.transform();
-		owlDoc.save("file:///tmp/out.owl");
+		//owlDoc.save("file:///tmp/out.owl");
 	}
 
+	/**
+	 * Hautmethoden zur Transformation
+	 * @throws URISyntaxException
+	 * @throws Exception
+	 */
 	private void transform() throws URISyntaxException, Exception {
 		List<Element> signatures = lexDoc.getSignatures();
 
@@ -169,9 +184,9 @@
 		// timespan
 
 		try {
-			String dateDcStart = lexDoc.getValue(record, ".//datdf/startdate");
+			String dateDcStart = lexDoc.getValue(record, ".//datrf/startdate");
 
-			String dateDcEnd = lexDoc.getValue(record, ".//datdf/startdate");
+			String dateDcEnd = lexDoc.getValue(record, ".//datrf/enddate");
 
 			if (!dateDcStart.equals("")) {
 				OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart,
@@ -217,12 +232,19 @@
 				String name = lexDoc.getValue(nomiq, "./name");
 				String role = lexDoc.getValue(nomiq, "./role");
 
+				String provenance = lexDoc.getValue(nomiq, "./name/provenance");
+				
+					
+					
 				if (!name.equals("") && !role.equals("")) {
 					recordNamesRoles = handleNameWithRole(recordInd, name, role);
 				} else if (!role.equals("")) {
 					recordNamesRoles = createOrGetRole(role);
 				} else if (!name.equals("")) {
 					recordNamesRoles = createOrGetName(name);
+					if (provenance!=""){
+						owlDoc.setDataTypePropery(recordNamesRoles, "has_provenance_as_string", provenance, "it");
+					}
 				}
 
 				if (recordNamesRoles != null) {
@@ -274,21 +296,23 @@
 	}
 
 	private void createType(OWLIndividual eventInstance, Element type) {
-
+		
+		
 		String typeId;
 		try {
 			typeId = lexDoc.getValue(type, "./ptr/@target");
 			String clsName = owlDoc.getClassNameFromTypeId(typeId);
-			OWLNamedClass cls = owlDoc.getClassFromTypeId(typeId);
 			OWLIndividual typeInd = owlDoc.createInstance(clsName);
 
-			OWLNamedClass subjectClass = getPreferredTargetClass(cls,
-					"has_subject");
-			OWLNamedClass predicateClass = getPreferredTargetClass(cls,
-					"has_predicate");
-
+			owlDoc.setProperty(eventInstance, "has_topic", typeInd);	
 			List<Element> freeTexts = XPath.selectNodes(type, "./freetext");
 			for (Element freeText : freeTexts) {
+				OWLNamedClass cls = owlDoc.getClassFromTypeId(typeId);
+				
+				OWLNamedClass subjectClass = getPreferredTargetClass(cls,
+						"has_subject");
+				OWLNamedClass predicateClass = getPreferredTargetClass(cls,
+						"has_predicate");
 
 				String subjPointer = lexDoc.getValue(freeText,
 						"./sub/ptrtoperson/@target");
@@ -296,6 +320,55 @@
 
 				OWLIndividual subjInd = createSubjectOrPredicate(subjectClass,
 						subjPointer, subjText);
+				
+				
+				//suche ob eine subpropery von materiaInvolved fuer die die zem Type (type) gehoerige Klasse (clsName) existiert 
+				// und wenn ja welche, TODO: zur Zeit wird dann aus dem String "subjText"  das entsprechende Material erzeugt.
+				
+				
+				//Collection<RDFProperty> props = cls.getPossibleRDFProperties();
+						
+						
+				
+				RDFProperty superproperty= owlDoc.owlModel.getRDFProperty("http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/materialInvolved");
+				
+				Collection<RDFSClass> sc = cls.getSuperclasses(true);
+				
+				OWLNamedClass mat = owlDoc.owlModel.getOWLNamedClass("http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/DuomoActivities");
+				
+				if (sc.contains(mat)){ // cls is subclass of DuomoActivities
+					Collection<RDFProperty> props = superproperty.getSubproperties(false);
+					
+					//jetzt suche zu welcher subproperty die classe cls gehšrt
+					
+					//dazu gehe durch alle subproperties von materialInvolved-
+					for (RDFProperty prop:props){
+						
+						@SuppressWarnings("unchecked")
+						Collection<RDFSClass> domains = prop.getDomains(true);
+								
+						for(RDFSClass domain: domains){
+						//if (domain.getName().equals(cls.getName()))
+							
+						//suche jetzt die domaene zu diesen property in schaue ob die cls eine subklasse davon ist
+						if (cls.isSubclassOf(domain)) // cls ist in der domaene der property
+								{
+							
+							//die propery muss genau einen wert aus einer festen klasse haben, diese wird jetzt gesucht und dann eine individual erzeugt.
+							Collection<AbstractOWLRestriction> restrictions = cls.getRestrictions(prop, false); // suche die restriction und erzeuge dann ein object dieses type
+							for (AbstractOWLRestriction restriction: restrictions){
+								if (DefaultOWLAllValuesFrom.class.isInstance(restriction)){
+									DefaultOWLAllValuesFrom rest = (DefaultOWLAllValuesFrom)restriction;
+									RDFResource restClass = rest.getAllValuesFrom();
+									OWLIndividual inst = owlDoc.createOrGetInstanceWithIdentifier(restClass.getLocalName(), "Identifier", subjText, false);
+									owlDoc.setProperty(typeInd, prop.getLocalName(), inst);
+							//materialInd = owlDoc.createInstance(res.getName());
+							}
+							}
+						}
+						}
+					}
+				}
 
 				String predPointer = lexDoc.getValue(freeText,
 						"./pred/ptrtoperson/@target");
@@ -328,7 +401,8 @@
 		OWLIndividual subjInd = null;
 
 		if (!subjPointer.equals("")) {
-			subjInd = toClass.createOWLIndividual(null);
+			subjInd = owlDoc.createInstance(toClass.getName());
+			//subjInd = toClass.createOWLIndividual(null);
 			OWLIndividual ind = individualIds.get(subjPointer);
 			if (ind == null) {
 				logger.debug("target ID does not exist:" + subjPointer);
@@ -344,20 +418,21 @@
 			}
 		}
 
-		if (!subjText.equals("")) {
+		if (!subjText.equals("") & !subjText.equals(" ")) {
 			if (subjInd == null)
-				subjInd = toClass.createOWLIndividual(null);
+				subjInd = owlDoc.createInstance(toClass.getName());
+				//subjInd = toClass.createOWLIndividual(null);
 
 			OWLNamedClass idcls = owlDoc.owlModel
 					.getOWLNamedClass("Identifier"); // is die klasse selbst
 														// schon ein identifiert
 			if (toClass.getNamedSuperclasses(true).contains(idcls)) { // to
-				owlDoc.setProperty(subjInd, "has_readable_id", subjText);
+				owlDoc.setProperty(subjInd, "rdfs:label", subjText);
 			} else {
 
 				OWLIndividual ident = owlDoc
 						.createInstance("IdentifierPredicateOrSubject");
-				owlDoc.setProperty(ident, "has_readable_id", subjText);
+				owlDoc.setProperty(ident, "rdfs:label", subjText);
 				owlDoc.setProperty(subjInd, "crm:P48_has_preferred_identifier",
 						ident);
 			}
@@ -513,7 +588,7 @@
 		OWLIndividual recordInstance = owlDoc.createInstance("Record");
 		owlDoc.setProperty(recordInstance, "is_on_card", cardInd);
 		createNewDependingInstanceFromXpath(record, recordInstance, "./@id",
-				new String[] { "has_readable_id", "rdfs:label" },
+				new String[] {  "rdfs:label" },
 				"IdentifierCurrent", "crm:P48_has_preferred_identifier");
 
 		String value = lexDoc.getValue(record, ".//textblockid");
@@ -521,14 +596,18 @@
 			owlDoc.setProperty(recordInstance, "has_textblockid", value);
 
 		String endOnCarta = lexDoc.getValue(record, "./@end_on_carta");
+		
+		//FIXME: addRecordToCarta ist buggy. siehe dort! ausserdem wir nicht berŸcksichtig, dass zwischen
+		// card und end_on_carta mehr als eine liegen kann, zur Zeit wird nur die carta die in end_on_carta beschrieben wird zu
+		// record mittels is_on_card hinzugefŸgt.
 		if (!endOnCarta.equals("")) {
 			OWLIndividual signature = (OWLIndividual) owlDoc
 					.getRelatedIndividual(cardInd, "has_signature");
-			addRecordToCarta(recordInstance, value, signature);
+			addRecordToCarta(recordInstance, endOnCarta, signature);
 		}
 
 		String dateDcStart = lexDoc.getValue(record, ".//datdc/startdate");
-		String dateDcEnd = lexDoc.getValue(record, ".//datdc/startdate");
+		String dateDcEnd = lexDoc.getValue(record, ".//datdc/enddate");
 
 		OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart, dateDcEnd);
 
@@ -540,6 +619,8 @@
 	private void addRecordToCarta(OWLIndividual recordInstance, String cardID,
 			OWLIndividual signature) {
 
+		//FIXME: cartID ist nur innerhalb einer Signatur eindeutig, d.h. h, es muss die cardID gefunden werden die in der 
+		// selben signatur lebt wir "signature"
 		OWLIndividual card = owlDoc.getIndividualByReadableId("Card", cardID);
 
 		if (card == null) {
@@ -567,7 +648,6 @@
 		owlDoc.setProperty(cardInstance, "has_signature", signature);
 
 		OWLIndividual preferredId = owlDoc.createInstance("IdentifierCurrent");
-		owlDoc.setProperty(preferredId, "has_readable_id", cardId);
 		owlDoc.setProperty(preferredId, "rdfs:label", cardId);
 
 		owlDoc.setProperty(cardInstance, "crm:P48_has_preferred_identifier",
@@ -583,13 +663,11 @@
 		try {
 
 			createNewDependingInstanceFromXpath(card, cardInstance,
-					".//cartanr", new String[] { "has_readable_id",
-							"rdfs:label" }, "IdentifierCurrent",
+					".//cartanr", new String[] { "rdfs:label" }, "IdentifierCurrent",
 					"crm:P48_has_preferred_identifier");
 
 			createNewDependingInstanceFromXpath(card, cardInstance,
-					".//cartaant", new String[] { "has_readable_id",
-							"rdfs:label" }, "IdentifierCurrent",
+					".//cartaant", new String[] { "rdfs:label" }, "IdentifierCurrent",
 					"crm:P1_is_identified_by");
 
 			owlDoc.setProperty(cardInstance, "has_signature", signatureInd);
--- a/src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java	Wed Feb 09 16:36:36 2011 +0100
+++ b/src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java	Thu Jun 21 17:08:22 2012 +0200
@@ -1,8 +1,10 @@
 package de.mpiwg.dwinter.duomo.lexdump;
 
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
@@ -11,6 +13,8 @@
 import java.util.Iterator;
 import java.util.List;
 
+import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction;
+
 import org.apache.log4j.Logger;
 
 import edu.stanford.smi.protege.exception.OntologyLoadException;
@@ -24,6 +28,19 @@
 import edu.stanford.smi.protegex.owl.repository.RepositoryManager;
 import edu.stanford.smi.protegex.owl.repository.impl.LocalFolderRepository;
 
+/**
+ * Erzeugt Entities und Verwaltet gemŠ§ eines OWL-Modells.
+ * Alle erzeugten Entities werden direkt als nturtle in eine File geschrieben.
+ * 
+ * TODO: Dieses File ist immer /tmp/out.rdf
+ * TODO: zur Ausgabe gibt es eine Methode @see{org.openrdf.rio.trig.TriGWriter} bzw @see{de.mpiwg.itgroup.triplestoremanager.tools.Exporter}
+ *ie benutzt werden sollte anstelle der handgestrickten bei denen direkt in outrdf geschrieben wird.
+ * 
+ * TODO: Einzelen Methoden hier sind noch DUOMO und CRM spezifisch, insbesondere
+ * @author dwinter
+ *
+ *
+ */
 public class OWLImporter {
 
 	JenaOWLModel owlModel; // contains the model
@@ -33,7 +50,16 @@
 	private HashMap<String, OWLNamedClass> typeId2class=null;
 	private String ontFolder;
 	private URI uri;
+	//private FileWriter outRDF;
+	private OutputStreamWriter outRDF;
 
+	/**
+	 * Initialisiert die Klasse und
+	 * lŠdt die Ontologien ein
+	 * @param folder Ordner mit der zu bearbeitenden Ontologie
+	 * @param uri URI der Ontologie selbst
+	 * @throws OntologyLoadException
+	 */
 	public OWLImporter(String folder, URI uri) throws OntologyLoadException {
 		// owlModel = ProtegeOWL.createJenaOWLModelFromURI(uri);
 
@@ -41,6 +67,7 @@
 		this.uri=uri;
 		try {
 			this.fh= new FileWriter(new File("/tmp/identifier"));
+			this.outRDF= new OutputStreamWriter(new FileOutputStream("/tmp/out.rdf"),"UTF-8");
 		} catch (IOException e) {
 			// TODO Auto-generated catch block
 			e.printStackTrace();
@@ -48,10 +75,21 @@
 		loadOWL(folder, uri);
 	}
 
+	/**
+	 * LŠdt die Ontologie erneut ein.
+	 * @throws OntologyLoadException
+	 */
 	public void reloadOWL() throws OntologyLoadException{
 		loadOWL(ontFolder, uri);
 	}
 	
+	/**
+	 * 
+	 * LŠdt die Ontologien ein
+	 * @param folder Ordner mit der zu bearbeitenden Ontologie
+	 * @param uri URI der Ontologie selbst
+	 * @throws OntologyLoadException
+	 */
 	public void loadOWL(String folder, URI uri) throws OntologyLoadException {
 		owlModel = ProtegeOWL.createJenaOWLModel();
 		// Load repository
@@ -84,9 +122,9 @@
 	}
 
 	public static void main(String args[]) throws URISyntaxException {
-		String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput";
+		String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version";
 		URI ontologieUri = new URI(
-				"file:///Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput/duomoAnalysis.owl");
+				"file:///Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version/protege_version_duomo/duomoAnalysis.owl");
 		try {
 
 			OWLImporter im = new OWLImporter(base, ontologieUri);
@@ -97,27 +135,81 @@
 		}
 	}
 
-	public OWLIndividual createInstance(String string) {
+	/** Erzeuge Instanz uns schreibe  in das Exportfile die enstprechende <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> Relation.
+	 * @param fullClassName Name der Classe
+	 * @return null, wenn das Objekt nicht angelegt werden kann.
+	 */
+	public OWLIndividual createInstance(String fullClassName) {
 		OWLNamedClass owlclass = (OWLNamedClass) owlModel
-				.getOWLNamedClass(string);
+				.getOWLNamedClass(fullClassName);
 
 		if (owlclass == null) {
-			logger.debug("Cannot find OWLClass:" + string);
+			logger.debug("Cannot find OWLClass:" + fullClassName);
 			return null;
 		}
 		//logger.debug("Create new individual of type:"+string);
-		return owlclass.createOWLIndividual(null);
+		
+		
+		OWLIndividual ind = owlclass.createOWLIndividual(null);
+		
+		//TODO: replace the following by  @see{org.openrdf.rio.trig.TriGWriter}
+		String triple = String.format("<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <%s>.\n", ind.getName(),owlclass.getName());
+		try {
+			outRDF.write(triple);
+			outRDF.flush();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		return ind;
 
 	}
 
-	public void setProperty(OWLIndividual identifier, String propertyName,
+	
+	/**
+	 * Erzeuge eine Prpoerty und schreibe die entsprechenden Informationen in das out-file.
+	 * @param individual 
+	 * @param propertyName
+	 * @param value
+	 */
+	public void setProperty(OWLIndividual individual, String propertyName,
 			Object value) {
 		RDFProperty prop = owlModel.getRDFProperty(propertyName);
-		identifier.setPropertyValue(prop, value);
-
+		individual.setPropertyValue(prop, value);
+		
+		
+		
+		//TODO: replace the following by  @see{org.openrdf.rio.trig.TriGWriter}
+		String valName="";
+		
+		
+		if (OWLIndividual.class.isInstance(value))
+			valName="<"+((OWLIndividual)value).getName()+">";
+		else
+			valName="\""+escapeRDFLit((String)value)+"\"";
+		
+		
+		String triple = String.format("<%s> <%s> %s.\n", individual.getName(),prop.getName(),valName);
+		try {
+			outRDF.write(triple);
+			outRDF.flush();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
 	}
 	
-	public void setDataTypePropery(OWLIndividual eventInstance,
+	private String escapeRDFLit(String string){
+		return string.replace("\"", "");
+	}
+	
+	/** Erzeuge DatatypePropery schreibe die entsprechenden Informationen in das out-file.
+	 * @param individual
+	 * @param propertyName
+	 * @param value
+	 * @param lang Sprach-tag
+	 */
+	public void setDataTypePropery(OWLIndividual individual,
 			String propertyName, String value, String lang) {
 		RDFProperty prop = owlModel.getRDFProperty(propertyName);
 //		if(OWLDatatypeProperty.class.isInstance(prop)){
@@ -129,20 +221,35 @@
 //		}
 		
 		
+		
 		RDFSLiteral langLiteral = owlModel.createRDFSLiteral(value, lang);
-		eventInstance.setPropertyValue(prop, langLiteral);
-
+		individual.setPropertyValue(prop, langLiteral);
 		
+		//TODO: replace the following by  @see{org.openrdf.rio.trig.TriGWriter}
+		String triple = String.format("<%s> <%s> \"%s\"@%s.\n", individual.getName(),prop.getName(),escapeRDFLit(langLiteral.getString()),lang);
+		try {
+			outRDF.write(triple);
+			outRDF.flush();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
 	}
 
-	public Object getRelatedIndividual(OWLIndividual cardInd,
+	/**
+	 * Gibt Individual zurŸck das Ÿber die Property mit der Quelle verbunden ist.
+	 * @param sourceIndividual
+	 * @param propertyName
+	 * @return
+	 */
+	public Object getRelatedIndividual(OWLIndividual sourceIndividual,
 			String propertyName) {
 		RDFProperty prop = owlModel.getRDFProperty(propertyName);
 		if (prop == null) {
 			logger.debug("Property does not exist:" + propertyName);
 		}
 
-		Object value = cardInd.getPropertyValue(prop);
+		Object value = sourceIndividual.getPropertyValue(prop);
 		return value;
 	}
 
@@ -263,32 +370,75 @@
 	}
 	
 	public OWLIndividual createTimeSpan(String dateDcStart, String dateDcEnd) {
-		OWLIndividual timeSpan = createInstance("DatesDocument");
+		// A date is described by an timespan which is described by an appellation
 		
-			
+		//TODO: die lesbaren daten gehšren in time span mit den angaben wir ungefŠhr und soweiter
+		// d.h. die klammern um ein Datum die ungefaehr signalisieren, gehoeren in die Beschreibung der 
+		//Qualifier in Time span
+		// in DuomoDate_Appellation kommen die genauen teile
+		// zunaechst schreibe ich in "has_readable" hier genau rein was im Text steht
+		// dieses muss noch analysiert werden
+		// so sollte 1432 gennai 9 --> in ein Computer lesbares modell umgesetzt werden
+		// und 1432 gennaio (9) muss in 1432 gennaio 9 in DuomoDate_Appellation eingetragen werden
+		// und dann mit P79 und P80 in DuomoDate_TimeSpan genauer beschrieben werden, d.h in P79 kommt dann 
+		// ungefaehr
+		// ausserdem mussesn "POST" und "ANTE aus dem Text in das Modell eingepflegt werden.
 		
-		setProperty(timeSpan, "has_readable_date", dateDcStart);
+		
+		OWLIndividual timeSpan = createInstance("DuomoDate_TimeSpan");
+		
+		OWLIndividual date= createInstance("DuomoDate_Appellation");	
+		
+		setProperty(date, "rdfs:label", dateDcStart);
+		//setProperty(timeSpan, "has_readable_date", dateDcStart);
+		
+		setProperty(timeSpan,"is_identified_by_Date",date);
 		
 		if(!(dateDcEnd==null || dateDcEnd.equals(""))){
-			setProperty(timeSpan, "has_readable_to_date", dateDcEnd);
+			OWLIndividual toDate= createInstance("DuomoDate_Appellation");
+			setProperty(toDate, "rdfs:label", dateDcEnd);
+			//setProperty(timeSpan, "has_readable_toDate", dateDcEnd);
+			setProperty(timeSpan,"is_identified_by_toDate",toDate);
 		} else {
 			dateDcEnd=dateDcStart;
 		}
 		
 		OWLIndividual timeSpanIdentifier = createInstance("Identifier");
-		setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd);
+		setProperty(timeSpanIdentifier,"rdfs:label",dateDcStart+"-"+dateDcEnd);
 		
 		setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier);
 		
 		return timeSpan;
 	}
+	
+//	public OWLIndividual createTimeSpan2(String dateDcStart, String dateDcEnd) {
+//		OWLIndividual timeSpan = createInstance("DatesDocument");
+//		
+//			
+//		
+//		setProperty(timeSpan, "has_readable_date", dateDcStart);
+//		
+//		if(!(dateDcEnd==null || dateDcEnd.equals(""))){
+//			setProperty(timeSpan, "has_readable_to_date", dateDcEnd);
+//		} else {
+//			dateDcEnd=dateDcStart;
+//		}
+//		
+//		OWLIndividual timeSpanIdentifier = createInstance("Identifier");
+//		setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd);
+//		
+//		setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier);
+//		
+//		return timeSpan;
+//	}
+
 
 	public OWLIndividual getIndividualByReadableId(String className,String identifier){
-		return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "has_readable_id", identifier, true);
+		return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "rdfs:label", identifier, true);
 	}
 
 	public OWLIndividual getIndividualByReadableId(String className,String identifier, String classNameIdentifier,boolean subclassedIdentifier){
-		return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "has_readable_id", identifier,subclassedIdentifier);
+		return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "rdfs:label", identifier,subclassedIdentifier);
 	}
 
 	public String getClassNameFromTypeId(String typeId) {
@@ -333,11 +483,14 @@
 
 	public OWLIndividual createOrGetInstanceWithIdentifier(String classNameInstance,
 			String classNameIdentifier, String identifier,boolean followSubclasses) {
+		
+		identifier=org.apache.commons.lang.StringUtils.strip(identifier);
+		
 		OWLIndividual ind = getIndividualByReadableId(classNameInstance, identifier,classNameIdentifier,followSubclasses);
 		if(ind==null){
 			ind = createInstance(classNameInstance);
 			OWLIndividual identifierInd = createInstance(classNameIdentifier);
-			setProperty(identifierInd, "has_readable_id", identifier);
+			setProperty(identifierInd, "rdfs:label", identifier);
 			try {
 				fh.write(classNameInstance+" --" +classNameIdentifier+"---"+identifier+"\n");
 				fh.flush();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/duomo/stanford/Analyse.java	Thu Jun 21 17:08:22 2012 +0200
@@ -0,0 +1,182 @@
+package de.mpiwg.dwinter.duomo.stanford;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import edu.stanford.nlp.io.EncodingPrintWriter.out;
+import edu.stanford.nlp.ling.CyclicCoreLabel;
+import edu.stanford.nlp.ling.DocumentReader;
+import edu.stanford.nlp.ling.HasWord;
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
+import edu.stanford.nlp.process.DocumentPreprocessor;
+import edu.stanford.nlp.trees.GrammaticalRelation;
+import edu.stanford.nlp.trees.GrammaticalStructure;
+import edu.stanford.nlp.trees.GrammaticalStructureFactory;
+import edu.stanford.nlp.trees.PennTreebankLanguagePack;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.TreebankLanguagePack;
+import edu.stanford.nlp.trees.TypedDependency;
+
+public class Analyse {
+
+	public void analyse(String filename) throws IOException {
+
+		LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
+		// This option shows loading and sentence-segment and tokenizing
+		// a file using DocumentPreprocessor
+		TreebankLanguagePack tlp = new PennTreebankLanguagePack();
+		GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
+		// You could also create a tokenier here (as below) and pass it
+		// to DocumentPreprocessor
+
+		int count=0;
+		Map<String,Integer> tuple = new HashMap<String,Integer>(); 
+		Map<String,Integer> tupleLong = new HashMap<String,Integer>(); 
+		Map<String,Integer> words = new HashMap<String,Integer>(); 
+
+		FileInputStream fstream = new FileInputStream(filename);
+		// Get the object of DataInputStream
+		DataInputStream in = new DataInputStream(fstream);
+		BufferedReader br = new BufferedReader(new InputStreamReader(in));
+		String strLine;
+		//Read File Line By Line
+		while ((strLine = br.readLine()) != null)   {
+
+			// correct line needs to be completed to a sentence
+			strLine=strLine.replace("\"", "");
+			strLine="This is a "+strLine;
+
+
+			Reader dr = DocumentReader.getReader(strLine);
+
+
+
+			for (List<HasWord> sentence : new DocumentPreprocessor(dr)) {
+				Tree parse = lp.apply(sentence);
+				//parse.pennPrint();
+				//System.out.println();
+
+				for (HasWord word: sentence)
+				{
+					Word wd = (Word)word;
+
+					String st= wd.value().toLowerCase();
+
+					if (words.containsKey(st)){
+						words.put(st, words.get(st)+1);
+					} else {
+						words.put(st, 1);
+					}
+
+				}
+
+
+				GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
+				Collection tdl = gs.typedDependenciesCCprocessed(true);
+
+				for (Object t: tdl){
+					if (TypedDependency.class.isInstance(t)){
+
+
+						TypedDependency td = (TypedDependency)t;
+
+						GrammaticalRelation reln = td.reln();
+						if (reln.getShortName().equals("prep") || reln.getShortName().equals("conj") ){
+
+							String st = reln.getShortName()
+									+"\t";
+
+							st +=td.gov().label().value()+"\t";
+
+							st+=td.dep().label().value();
+
+							st=st.toLowerCase();
+							if (tuple.containsKey(st)){
+								tuple.put(st, tuple.get(st)+1);
+							} else {
+								tuple.put(st, 1);
+							}
+
+							st = reln.getShortName()+"\t"+reln.getSpecific()+"\t";
+
+							st +=td.gov().label().value()+"\t";
+
+							st+=td.dep().label().value();
+
+							st=st.toLowerCase();
+
+							if (tupleLong.containsKey(st)){
+								tupleLong.put(st, tupleLong.get(st)+1);
+							} else {
+								tupleLong.put(st, 1);
+							}
+
+						}
+
+					}
+
+				}
+
+				//System.out.println(tdl);
+				//System.out.println();
+				count++;
+				System.out.println(count);
+
+
+			}
+			//if (count > 5)
+			//	  break;
+		}
+		System.out.println(tuple);
+		System.out.println(tupleLong);
+
+		FileWriter fw = new FileWriter("/tmp/tuple");
+
+		for (String key : tuple.keySet()){
+			fw.write(key+"\t"+String.valueOf(tuple.get(key))+"\n");
+		}
+		fw.close();
+
+
+		fw = new FileWriter("/tmp/tupleLong");
+
+		for (String key : tupleLong.keySet()){
+			fw.write(key+"\t"+String.valueOf(tupleLong.get(key))+"\n");
+		}
+		fw.close();
+
+		fw = new FileWriter("/tmp/words");
+
+		for (String key : words.keySet()){
+			fw.write(key+"\t"+String.valueOf(words.get(key))+"\n");
+		}
+		fw.close();
+
+	}
+	/**
+	 * @param args
+	 */
+	public static void main(String[] args) {
+		Analyse a = new Analyse();
+		try {
+			a.analyse("/tmp/reges.csv");
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java	Thu Jun 21 17:08:22 2012 +0200
@@ -0,0 +1,210 @@
+// Analisiere calls from the virtuoso store
+// "http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/RecordedEvent_41164","Term of payment for debt for forced loans."
+// select distinct * where { {?x duomo:has_reges ?y} FILTER(lang(?y)="en")}
+
+
+package de.mpiwg.dwinter.duomo.stanford;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import edu.stanford.nlp.io.EncodingPrintWriter.out;
+import edu.stanford.nlp.ling.CyclicCoreLabel;
+import edu.stanford.nlp.ling.DocumentReader;
+import edu.stanford.nlp.ling.HasWord;
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
+import edu.stanford.nlp.process.DocumentPreprocessor;
+import edu.stanford.nlp.trees.GrammaticalRelation;
+import edu.stanford.nlp.trees.GrammaticalStructure;
+import edu.stanford.nlp.trees.GrammaticalStructureFactory;
+import edu.stanford.nlp.trees.PennTreebankLanguagePack;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.TreebankLanguagePack;
+import edu.stanford.nlp.trees.TypedDependency;
+
+public class AnalyseWithEvents {
+
+	public void analyse(String filename) throws IOException {
+
+		LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
+		// This option shows loading and sentence-segment and tokenizing
+		// a file using DocumentPreprocessor
+		TreebankLanguagePack tlp = new PennTreebankLanguagePack();
+		GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
+		// You could also create a tokenier here (as below) and pass it
+		// to DocumentPreprocessor
+
+		int count=0;
+		Map<String,List<String>> tuple = new HashMap<String,List<String>>(); 
+		Map<String,List<String>> tupleLong = new HashMap<String,List<String>>(); 
+		Map<String,List<String>> words = new HashMap<String,List<String>>(); 
+
+		FileInputStream fstream = new FileInputStream(filename);
+		// Get the object of DataInputStream
+		DataInputStream in = new DataInputStream(fstream);
+		BufferedReader br = new BufferedReader(new InputStreamReader(in));
+		String strLineFull;
+		//Read File Line By Line
+		while ((strLineFull = br.readLine()) != null)   {
+
+			// correct line needs to be completed to a sentence
+			String[] splitted = strLineFull.split(",");
+			
+			
+			// Line hat die Form:  "http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/RecordedEvent_41164","Term of payment for debt for forced loans."
+			
+			String strLine=splitted[1];
+			String recordURI = splitted[0];
+			strLine=strLine.replace("\"", "");
+			strLine="This is a "+strLine;
+
+
+			Reader dr = DocumentReader.getReader(strLine);
+
+
+
+			for (List<HasWord> sentence : new DocumentPreprocessor(dr)) {
+				Tree parse = lp.apply(sentence);
+				//parse.pennPrint();
+				//System.out.println();
+				
+				for (HasWord word: sentence)
+				{
+					Word wd = (Word)word;
+
+					String st= wd.value().toLowerCase();
+					
+					if (words.containsKey(st)){
+						words.get(st).add(recordURI);
+					} else {
+						List<String> ls =new ArrayList<String>(); 
+						ls.add(recordURI);
+						words.put(st, ls);
+					}
+
+				}
+
+
+				GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
+				Collection tdl = gs.typedDependenciesCCprocessed(true);
+				
+				for (Object t: tdl){
+					if (TypedDependency.class.isInstance(t)){
+
+
+						TypedDependency td = (TypedDependency)t;
+
+						GrammaticalRelation reln = td.reln();
+						if (reln.getShortName().equals("prep") || reln.getShortName().equals("conj") ){
+
+							String st = reln.getShortName()
+									+"\t";
+
+							st +=td.gov().label().value()+"\t";
+
+							st+=td.dep().label().value();
+
+							st=st.toLowerCase();
+							
+							if (tuple.containsKey(st)){
+								tuple.get(st).add(recordURI);
+							} else {
+								List<String> ls =new ArrayList<String>(); 
+								ls.add(recordURI);
+								tuple.put(st, ls);
+							}
+							
+
+							st = reln.getShortName()+"\t"+reln.getSpecific()+"\t";
+
+							st +=td.gov().label().value()+"\t";
+
+							st+=td.dep().label().value();
+
+							st=st.toLowerCase();
+
+							if (tupleLong.containsKey(st)){
+								tupleLong.get(st).add(recordURI);
+							} else {
+								List<String> ls =new ArrayList<String>(); 
+								ls.add(recordURI);
+								tupleLong.put(st, ls);
+							}
+						
+
+						}
+
+					}
+
+				}
+
+				//System.out.println(tdl);
+				//System.out.println();
+				count++;
+				System.out.println(count);
+
+
+			}
+			//if (count > 5)
+			//	  break;
+		}
+		System.out.println(tuple);
+		System.out.println(tupleLong);
+
+		FileWriter fw = new FileWriter("/tmp/tuple");
+
+		for (String key : tuple.keySet()){
+			List<String> val = tuple.get(key);	
+			fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
+			
+		}
+		fw.close();
+
+
+		fw = new FileWriter("/tmp/tupleLong");
+
+		for (String key : tupleLong.keySet()){
+			List<String> val = tupleLong.get(key);
+			
+			fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
+		}
+		fw.close();
+
+		fw = new FileWriter("/tmp/words");
+
+		for (String key : words.keySet()){
+			
+			List<String> val = words.get(key);	
+			fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
+		}
+		fw.close();
+
+	}
+	/**
+	 * @param args
+	 */
+	public static void main(String[] args) {
+		AnalyseWithEvents a = new AnalyseWithEvents();
+		try {
+			a.analyse("/tmp/reges.csv");
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/duomo/stanford/ParserDemo.java	Thu Jun 21 17:08:22 2012 +0200
@@ -0,0 +1,112 @@
+package de.mpiwg.dwinter.duomo.stanford;
+
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+import edu.stanford.nlp.objectbank.TokenizerFactory;
+import edu.stanford.nlp.process.CoreLabelTokenFactory;
+import edu.stanford.nlp.process.DocumentPreprocessor;
+import edu.stanford.nlp.process.PTBTokenizer;
+import edu.stanford.nlp.ling.CoreLabel;  
+import edu.stanford.nlp.ling.DocumentReader;
+import edu.stanford.nlp.ling.HasWord;  
+import edu.stanford.nlp.trees.*;
+import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
+
+class ParserDemo {
+
+  public static void main(String[] args) {
+    LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
+    if (args.length > 0) {
+      try {
+		demoDP(lp, args[0]);
+	} catch (IOException e) {
+		// TODO Auto-generated catch block
+		e.printStackTrace();
+	}
+    } else {
+      demoAPI(lp);
+    }
+  }
+
+  public static void demoDP(LexicalizedParser lp, String filename) throws IOException {
+    // This option shows loading and sentence-segment and tokenizing
+    // a file using DocumentPreprocessor
+    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
+    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
+    // You could also create a tokenier here (as below) and pass it
+    // to DocumentPreprocessor
+    
+    FileInputStream fstream = new FileInputStream(filename);
+    // Get the object of DataInputStream
+    DataInputStream in = new DataInputStream(fstream);
+    BufferedReader br = new BufferedReader(new InputStreamReader(in));
+    String strLine;
+    //Read File Line By Line
+    while ((strLine = br.readLine()) != null)   {
+    
+    	// correct line needs to be completed to a sentence
+    	strLine=strLine.replace("\"", "");
+    	strLine="This is a "+strLine;
+    			
+    			
+    	Reader dr = DocumentReader.getReader(strLine);
+    	
+	    for (List<HasWord> sentence : new DocumentPreprocessor(dr)) {
+	      Tree parse = lp.apply(sentence);
+	      parse.pennPrint();
+	      System.out.println();
+	      
+	      GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
+	      Collection tdl = gs.typedDependenciesCCprocessed(true);
+	      System.out.println(tdl);
+	      System.out.println();
+	    }
+    }
+  }
+
+  public static void demoAPI(LexicalizedParser lp) {
+    // This option shows parsing a list of correctly tokenized words
+    String[] sent = { "This", "is", "an", "easy", "sentence", "." };
+    List<CoreLabel> rawWords = new ArrayList<CoreLabel>();
+    for (String word : sent) {
+      CoreLabel l = new CoreLabel();
+      l.setWord(word);
+      rawWords.add(l);
+    }
+    Tree parse = lp.apply(rawWords);
+    parse.pennPrint();
+    System.out.println();
+
+    // This option shows loading and using an explicit tokenizer
+    String sent2 = "This is another sentence.";
+    TokenizerFactory<CoreLabel> tokenizerFactory = 
+      PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
+    List<CoreLabel> rawWords2 = 
+      tokenizerFactory.getTokenizer(new StringReader(sent2)).tokenize();
+    parse = lp.apply(rawWords2);
+
+    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
+    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
+    GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
+    List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
+    System.out.println(tdl);
+    System.out.println();
+
+    TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
+    tp.printTree(parse);
+  }
+
+  private ParserDemo() {} // static methods only
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/duomo/stanford/ParserDemo2.java	Thu Jun 21 17:08:22 2012 +0200
@@ -0,0 +1,72 @@
+package de.mpiwg.dwinter.duomo.stanford;
+
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.*;
+
+import edu.stanford.nlp.ling.HasWord;
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.ling.Sentence;
+import edu.stanford.nlp.process.DocumentPreprocessor;
+import edu.stanford.nlp.process.Tokenizer;
+import edu.stanford.nlp.trees.*;
+import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
+
+class ParserDemo2 {
+
+  /** Usage: ParserDemo2 [[grammar] textFile] */
+  public static void main(String[] args) throws IOException {
+    String grammar = args.length > 0 ? args[0] : "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
+    String[] options = { "-maxLength", "80", "-retainTmpSubcategories" };
+    LexicalizedParser lp = LexicalizedParser.loadModel(grammar, options);
+    TreebankLanguagePack tlp = new PennTreebankLanguagePack();
+    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
+
+
+    Iterable<List<? extends HasWord>> sentences;
+    if (args.length > 1) {
+      DocumentPreprocessor dp = new DocumentPreprocessor(args[1]);
+      List<List<? extends HasWord>> tmp = 
+        new ArrayList<List<? extends HasWord>>();
+      for (List<HasWord> sentence : dp) {
+        tmp.add(sentence);
+      }
+      sentences = tmp;
+    } else {
+      // Showing tokenization and parsing in code a couple of different ways.
+      String[] sent = { "This", "is", "an", "easy", "sentence", "." };
+      List<HasWord> sentence = new ArrayList<HasWord>();
+      for (String word : sent) {
+        sentence.add(new Word(word));
+      }
+      String sent2 = ("This is a slightly longer and more complex " +
+                      "sentence requiring tokenization.");
+      Tokenizer<? extends HasWord> toke = 
+        tlp.getTokenizerFactory().getTokenizer(new StringReader(sent2));
+      List<? extends HasWord> sentence2 = toke.tokenize();
+      List<List<? extends HasWord>> tmp = 
+        new ArrayList<List<? extends HasWord>>();
+      tmp.add(sentence);
+      tmp.add(sentence2);
+      sentences = tmp;
+    }
+
+    for (List<? extends HasWord> sentence : sentences) {
+      Tree parse = lp.apply(sentence);
+      parse.pennPrint();
+      System.out.println();
+      System.out.println(parse.taggedYield());
+      System.out.println();
+
+      GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
+      Collection tdl = gs.typedDependenciesCCprocessed(true);
+      System.out.println(tdl);
+      System.out.println();
+    }
+
+    String sent3 = "This is one last test!";
+    lp.apply(sent3).pennPrint();
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/dwinter/duomo/stanford/TokenWithEvent.java	Thu Jun 21 17:08:22 2012 +0200
@@ -0,0 +1,137 @@
+package de.mpiwg.dwinter.duomo.stanford;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import edu.stanford.nlp.io.EncodingPrintWriter.out;
+import edu.stanford.nlp.ling.CyclicCoreLabel;
+import edu.stanford.nlp.ling.DocumentReader;
+import edu.stanford.nlp.ling.HasWord;
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
+import edu.stanford.nlp.process.DocumentPreprocessor;
+import edu.stanford.nlp.trees.GrammaticalRelation;
+import edu.stanford.nlp.trees.GrammaticalStructure;
+import edu.stanford.nlp.trees.GrammaticalStructureFactory;
+import edu.stanford.nlp.trees.PennTreebankLanguagePack;
+import edu.stanford.nlp.trees.PennTreebankTokenizer;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.TreebankLanguagePack;
+import edu.stanford.nlp.trees.TypedDependency;
+import edu.stanford.nlp.trees.international.negra.NegraPennTokenizer;
+
+public class TokenWithEvent {
+
+	public void analyse(String filename) throws IOException {
+
+		LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
+		// This option shows loading and sentence-segment and tokenizing
+		// a file using DocumentPreprocessor
+		TreebankLanguagePack tlp = new PennTreebankLanguagePack();
+		GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
+		// You could also create a tokenier here (as below) and pass it
+		// to DocumentPreprocessor
+
+		int count=0;
+		
+		
+		Map<String,List<String>> words = new HashMap<String,List<String>>(); 
+
+		FileInputStream fstream = new FileInputStream(filename);
+		// Get the object of DataInputStream
+		DataInputStream in = new DataInputStream(fstream);
+		BufferedReader br = new BufferedReader(new InputStreamReader(in));
+		String strLineFull;
+		//Read File Line By Line
+		while ((strLineFull = br.readLine()) != null)   {
+
+			// correct line needs to be completed to a sentence
+			
+			
+			String[] splitted = strLineFull.split("\",\"");
+			String strLine=splitted[1];
+			String recordURI = splitted[0];
+			strLine=strLine.replace("\"", "");
+			//strLine="This is a "+strLine;
+
+
+			Reader dr = DocumentReader.getReader(strLine);
+
+			//PennTreebankTokenizer tk = new PennTreebankTokenizer(dr);
+			NegraPennTokenizer tk = new NegraPennTokenizer(dr);
+			
+			while (tk.hasNext()){
+				
+			
+					String t = tk.next();
+
+					String st= t.toLowerCase();
+					st= st.replace(".", "");
+					st= st.replace(",", "");
+					st= st.replace(":","");
+					st= st.replace(";","");
+					st= st.replace("!","");
+				
+					if (st.length()<2)
+						continue;
+	
+					if (words.containsKey(st)){
+						words.get(st).add(recordURI);
+					} else {
+						List<String> ls =new ArrayList<String>(); 
+						ls.add(recordURI);
+						words.put(st, ls);
+					}
+
+				
+
+
+				//System.out.println(tdl);
+				//System.out.println();
+				count++;
+				System.out.println(count);
+
+
+			}
+			//if (count > 100)
+			//	  break;
+		}
+	
+
+
+		FileWriter fw = new FileWriter("/tmp/words2");
+
+		for (String key : words.keySet()){
+			List<String> val = words.get(key);	
+			fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
+		}
+		fw.close();
+
+	}
+	/**
+	 * @param args
+	 */
+	public static void main(String[] args) {
+		TokenWithEvent a = new TokenWithEvent();
+		try {
+			a.analyse("/tmp/reges.csv");
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+
+	}
+
+}