changeset 3:1e4835334837

(none)
author jurzua
date Wed, 29 Oct 2014 13:29:59 +0000
parents 0e0082e1e12f
children 8ce07918ec8a
files docs/RDFGenerator.java docs/RDFTest.java docs/readme.txt
diffstat 3 files changed, 439 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/RDFGenerator.java	Wed Oct 29 13:29:59 2014 +0000
@@ -0,0 +1,284 @@
+package org.mpi.openmind.scripts;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.jrdf.JRDFFactory;
+import org.jrdf.SortedMemoryJRDFFactory;
+import org.jrdf.graph.Graph;
+import org.jrdf.graph.GraphElementFactory;
+import org.jrdf.graph.Resource;
+import org.jrdf.writer.RdfWriter;
+import org.jrdf.writer.Writer;
+import org.jrdf.writer.ntriples.NTriplesWriterImpl;
+import org.mpi.openmind.cache.WrapperService;
+import org.mpi.openmind.repository.bo.Attribute;
+import org.mpi.openmind.repository.bo.Entity;
+import org.mpi.openmind.repository.bo.Relation;
+import org.mpi.openmind.repository.services.ServiceRegistry;
+
+public class RDFGenerator {
+
+	//private OntologyService os;
+	private WrapperService os;
+	private String fileName;
+	private JRDFFactory jrdfFactory;
+	private Graph graph;
+	private GraphElementFactory elementFactory;
+	
+	public String mpiwg = "http://www.mpiwg.de/ismi/";
+	
+	private Map<String, URI> attURIMap = new HashMap<String, URI>();
+	private Map<String, URI> relURIMap = new HashMap<String, URI>();
+	
+	
+	public RDFGenerator(WrapperService os, String fileName){
+		this.os = os;
+		this.fileName = fileName;
+		
+		this.jrdfFactory = SortedMemoryJRDFFactory.getFactory();
+		this.graph = jrdfFactory.getGraph();
+		this.elementFactory = graph.getElementFactory();
+	}
+	
+	public void execute(long ... texts) throws Exception{
+		List<Entity> textList = null;
+		if(texts.length > 0){
+			textList = new ArrayList<Entity>();
+			for(int i=0; i < texts.length; i++){
+				textList.add(os.getLightweightEntityById(texts[i]));
+			}
+		}else{
+			textList = os.getLightweightAssertions("TEXT", null, 100);	
+		}
+		 
+		int count = 0;
+		for(Entity text : textList){
+			System.out.println(count + ")\t" + text.toString());
+			
+			if (text.isLightweight()) {
+				text = os.getEntityContent(text);
+			}
+			
+			Resource textResource = createResource(text);
+		
+			for(Relation rel : text.getTargetRelations("is_exemplar_of", "WITNESS")){
+				createWitnessResource(
+						os.getEntityById(rel.getSourceId())).
+						addValue(getRelURI("is_exemplar_of"), 
+								textResource
+								);
+			}	
+			System.out.println();
+			count++;
+		}
+		
+		long start = System.currentTimeMillis();
+		
+		Writer.writeRdfXml(new File(fileName), graph);
+		//Writer.writeNTriples(new File(fileName), graph);
+		/*
+		try {
+			System.out.println(tryWriteNTriple(graph));
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}*/
+		
+		System.out.println("Time writting [ms]=" + (System.currentTimeMillis() - start));
+		System.out.println("OK");
+		System.exit(0);
+	}
+	
+	public static String tryWriteNTriple(Graph graph) throws Exception {
+		OutputStream output = new OutputStream()
+	    {
+	        private StringBuilder string = new StringBuilder();
+	        @Override
+	        public void write(int b) throws IOException {
+	            this.string.append((char) b );
+	        }
+
+	        //Netbeans IDE automatically overrides this toString()
+	        public String toString(){
+	            return this.string.toString();
+	        }
+	    };
+	    try {
+	    	final RdfWriter writer = new NTriplesWriterImpl();
+	    	try {
+	    		writer.write(graph, output);
+	    	} finally {
+	    		writer.close();
+	    	}
+	    }finally {
+	    	output.close();
+	    }
+	    return output.toString();
+	}
+	
+	private Resource createWitnessResource(Entity witness) throws Exception{
+		System.out.print("W");
+		Resource witnessResource = createResource(witness);
+		
+		for(Relation rel : witness.getSourceRelations("is_part_of", "CODEX")){
+			witnessResource.addValue(getRelURI("is_part_of"), createCodexResource(os.getEntityById(rel.getTargetId())));
+		}
+		return witnessResource;
+	}
+	
+	private Resource createCodexResource(Entity codex) throws Exception{
+		System.out.print("C");
+		Resource codexResource = createResource(codex);
+		
+		for(Relation rel : codex.getSourceRelations("is_part_of", "COLLECTION")){
+			codexResource.addValue(getRelURI("is_part_of"), createCollectionResource(os.getEntityById(rel.getTargetId())));
+		}
+		return codexResource;
+	}
+	
+	private Resource createCollectionResource(Entity collection) throws Exception{
+		System.out.print("L");
+		Resource collectionResource = createResource(collection);
+		
+		for(Relation rel : collection.getSourceRelations("is_part_of", "REPOSITORY")){
+			collectionResource.addValue(getRelURI("is_part_of"), createRepositoryResource(os.getEntityById(rel.getTargetId())));
+		}
+		return collectionResource;
+	}
+	
+	private Resource createRepositoryResource(Entity repository) throws Exception{
+		System.out.print("R");
+		Resource repositoryResource = createResource(repository);
+		
+		for(Relation rel : repository.getSourceRelations("is_in", "PLACE")){
+			repositoryResource.addValue(getRelURI("is_in"), createResource(os.getEntityById(rel.getTargetId())));
+			System.out.print("P");
+		}
+		return repositoryResource;
+	}
+	
+	
+	/**
+	 * generate the resource from an entity and for each attribute will be generated a Literal
+	 * @param entity
+	 * @return
+	 */
+	private Resource createResource(Entity entity) throws Exception{
+		if (entity.isLightweight()) {
+			entity = os.getEntityContent(entity);
+		}
+		URI textURI = URI.create(mpiwg + entity.getObjectClass() + "/" + entity.getId());
+		Resource resource = elementFactory.createResource(textURI);
+		att2Literals(entity, resource);
+		
+		//addtype
+		resource.addValue(getRDFType(), entity.getObjectClass());
+		//resource.addValue(getRDFType(), "http://www.europeana.eu/schemas/edm/ProvidedCHO");
+		//resource.addValue(getEDMType(), "TEXT");
+		
+		return resource;
+	}
+	
+	private URI rdfType;
+	private URI edmType;
+	private URI getRDFType(){
+		if(rdfType == null){
+			try {
+				rdfType = new URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
+			} catch (URISyntaxException e) {
+				e.printStackTrace();
+			}
+		}
+		return rdfType;
+	}
+	
+	private URI getEDMType(){
+		if(rdfType == null){
+			try {
+				rdfType = new URI("http://www.europeana.eu/schemas/edm/type");
+			} catch (URISyntaxException e) {
+				e.printStackTrace();
+			}
+		}
+		return rdfType;
+	}
+	
+	
+	
+	
+	private void att2Literals(Entity entity, Resource resource){
+		for(Attribute att : entity.getAttributes()){
+			if(StringUtils.isNotEmpty(att.getValue())){
+				resource.addValue(getAttURI(att.getObjectClass()), att.getValue());
+			}
+		}
+	}
+	
+	
+	private URI getRelURI(String relName){
+		URI uri = relURIMap.get(relName);
+		if(uri == null){
+			String uriName = attNameToURIName(relName);
+			uri = URI.create(mpiwg + uriName);
+			relURIMap.put(relName, uri);
+		}
+		return uri;
+	}
+	
+	private URI getAttURI(String attName){
+		URI uri = attURIMap.get(attName);
+		if(uri == null){
+			String uriName = attNameToURIName(attName);
+			uri = URI.create(mpiwg + uriName);
+			attURIMap.put(attName, uri);
+		}
+		return uri;
+	}
+	
+	private static String attNameToURIName(String attName){
+		String[] words = attName.split("_");
+		if(words.length > 1){
+			StringBuilder sb = new StringBuilder(words[0]);
+			for(int i = 1; i < words.length; i++){
+				sb.append(Character.toUpperCase(words[i].charAt(0)));
+				sb.append(words[i].substring(1));
+			}
+			
+			return sb.toString();
+		}else{
+			return attName;
+		}
+	}
+	
+	
+	
+	public static void main(String[] args) {
+		//System.out.println(attNameToURIName("diagrams_and_illustrations"));
+		//System.out.println(attNameToURIName("hola"));
+		//System.out.println(attNameToURIName("diagrams_and"));
+		
+		ServiceRegistry services = new ServiceRegistry();
+		RDFGenerator rdfGenerator = 
+			new RDFGenerator(
+					services.getWrapper(), 
+					"/Users/jurzua/Projects/DM2E/Silk/ismi/ismi_data_source.xml");
+					//"/Users/jurzua/Projects/workspace/EDM/ISMI/rdf/rdf_text_id_415640.xml");
+		//rdfGenerator.execute(415640);
+		try {
+			rdfGenerator.execute(415640, 447023, 40979, 458950, 202603);
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+	}
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/RDFTest.java	Wed Oct 29 13:29:59 2014 +0000
@@ -0,0 +1,72 @@
+package org.mpi.openmind.rdf;
+
+import java.io.File;
+import java.net.URI;
+
+import org.jrdf.JRDFFactory;
+import org.jrdf.SortedMemoryJRDFFactory;
+import org.jrdf.graph.AnyTriple;
+import org.jrdf.graph.Graph;
+import org.jrdf.graph.GraphElementFactory;
+import org.jrdf.graph.Resource;
+import org.jrdf.graph.Triple;
+import org.jrdf.graph.TripleFactory;
+import org.jrdf.parser.RdfReader;
+import org.jrdf.util.ClosableIterator;
+import org.jrdf.writer.RdfWriter;
+import org.jrdf.writer.Writer;
+import org.springframework.web.servlet.view.RedirectView;
+
+public class RDFTest {
+
+	public static String mpiwg = "http://www.mpiwg.de/ismi/";
+	
+	/**
+	 * @param args
+	 */
+	public static void main(String[] args) {
+		
+		JRDFFactory jrdfFactory = SortedMemoryJRDFFactory.getFactory();
+		Graph graph = jrdfFactory.getGraph();
+		//RdfReader reader = new RdfReader();
+		//Graph graph = reader.parseNTriples(new File("/Users/jurzua/test001.nt"));
+		TripleFactory tripleFactory = graph.getTripleFactory();
+		GraphElementFactory elementFactory = graph.getElementFactory();
+		
+		URI personURI = URI.create(mpiwg + "Person/01");
+		URI berlinURI = URI.create(mpiwg + "Place/Berlin");
+		
+		URI isPartOf =  URI.create(mpiwg + "isPartOf");
+		URI isBornIn =  URI.create(mpiwg + "isBornIn");
+		
+		Resource person = elementFactory.createResource(personURI);
+		Resource berlin = elementFactory.createResource(berlinURI);
+		
+		person.addValue(isPartOf, person);
+		person.addValue(isBornIn, berlin);
+		
+		
+		/*
+		URI  uri1 = URI.create("urn:foo");
+		URI uri2 = URI.create("urn:bar");
+		
+		
+		
+		Triple t1 = tripleFactory.addTriple(uri1, uri1, uri1);
+		Triple t2 = tripleFactory.addTriple(uri2, uri2, uri2);
+		
+		System.out.println(t1);
+		System.out.println(t2);
+				//graph.remove(t1);
+		 */
+		
+
+		
+		//Writer.writeNTriples(new File("/Users/jurzua/Projects/workspace/EDM/ISMI/rdf/rdf.xml"), graph);
+		Writer.writeRdfXml(new File("/Users/jurzua/Projects/workspace/EDM/ISMI/rdf/rdf.xml"), graph);
+		System.out.println("OK");
+		
+
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/readme.txt	Wed Oct 29 13:29:59 2014 +0000
@@ -0,0 +1,83 @@
+mvn clean compile assembly:single
+cp target/openmind-1.0.0-jar-with-dependencies.jar /Users/jurzua/om4.jar 
+mvn install:install-file -Dfile=/Users/jurzua/om4.jar -DgroupId=de.mpiwg.openmind -DartifactId=openmind -Dversion=1.0.0 -Dpackaging=jar
+
+
+
+Scripts
+
+ mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.DoubleRelations" -Dexec.args="SHOW ismi ismipw"
+ mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.DoubleRelations" -Dexec.args="SHOW root e1nste1n"
+ 
+ 
+ mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve"
+ 
+ mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve WITNESS"
+ 
+ -----
+ Problem modification time
+ 
+ test with WITNESS 41578,
+ replace the codex. 
+ 
+ 
+ AbstractPersistenceService
+ linea: 1075
+ entity.addSourceRelation(rel)
+ 
+ Cuando queremos salvar la entidad actual, necesitamos que el metodo addSourceRelation no tire una exception si 
+ se encuentra con dos relaciones identicas. Por los datos estan malos, y necesitamos que se vayan limpiando solo.
+ Solucion: Necesitamos un nuevo metodo getEntityContent, el cual no use el metodo addSourceRelation,
+ para que acepte todo lo que encunetre en la base de datos. 
+ 
+ 
+ WITNESS:
+ 41578
+ 
+ CODEX: 
+ 297238
+ 
+ 
+ Conclusions:
+ 
+ for the case of Witness:
+ siempre que haya una lista de las relaciones is_part_of, hay que tomar la que tiene el mismo time_modif de su entity (Witness), por esa es la ultima modificación.
+ Si es que hay dos con el mismo time_modif, aun no hay solucion pensada.
+ 
+ --------------
+ TODO
+ Text without Author:
+ 52652
+ 
+ Solution:
+ 1. 
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve WITNESS src:is_part_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve WITNESS src:is_exemplar_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve CODEX src:is_part_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve COLLECTION src:is_part_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve REPOSITORY src:is_in"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve CODEX tar:is_part_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve COLLECTION tar:is_part_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve REPOSITORY tar:is_part_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PLACE tar:is_in"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve TEXT tar:is_exemplar_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve TEXT src:was_created_by"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve TEXT tar:is_prime_alias_title_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve TEXT tar:is_alias_title_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve TEXT tar:is_translation_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON tar:was_created_by"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON src:lived_in"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON tar:is_reference_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON tar:is_alias_name_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON src:has_role"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON tar:was_copied_by"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve ALIAS src:is_prime_alias_title_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve ALIAS src:is_alias_title_of"
+mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve ALIAS tar:has_title_written_as"
+ 
+ ------------
+ 
+ Test cases:
+ Text: 101571, just resave
+ translated 4892
+ 
\ No newline at end of file