diff docs/RDFGenerator.java @ 3:1e4835334837

(none)
author jurzua
date Wed, 29 Oct 2014 13:29:59 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/RDFGenerator.java	Wed Oct 29 13:29:59 2014 +0000
@@ -0,0 +1,284 @@
+package org.mpi.openmind.scripts;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.jrdf.JRDFFactory;
+import org.jrdf.SortedMemoryJRDFFactory;
+import org.jrdf.graph.Graph;
+import org.jrdf.graph.GraphElementFactory;
+import org.jrdf.graph.Resource;
+import org.jrdf.writer.RdfWriter;
+import org.jrdf.writer.Writer;
+import org.jrdf.writer.ntriples.NTriplesWriterImpl;
+import org.mpi.openmind.cache.WrapperService;
+import org.mpi.openmind.repository.bo.Attribute;
+import org.mpi.openmind.repository.bo.Entity;
+import org.mpi.openmind.repository.bo.Relation;
+import org.mpi.openmind.repository.services.ServiceRegistry;
+
+public class RDFGenerator {
+
+	//private OntologyService os;
+	private WrapperService os;
+	private String fileName;
+	private JRDFFactory jrdfFactory;
+	private Graph graph;
+	private GraphElementFactory elementFactory;
+	
+	public String mpiwg = "http://www.mpiwg.de/ismi/";
+	
+	private Map<String, URI> attURIMap = new HashMap<String, URI>();
+	private Map<String, URI> relURIMap = new HashMap<String, URI>();
+	
+	
+	public RDFGenerator(WrapperService os, String fileName){
+		this.os = os;
+		this.fileName = fileName;
+		
+		this.jrdfFactory = SortedMemoryJRDFFactory.getFactory();
+		this.graph = jrdfFactory.getGraph();
+		this.elementFactory = graph.getElementFactory();
+	}
+	
+	public void execute(long ... texts) throws Exception{
+		List<Entity> textList = null;
+		if(texts.length > 0){
+			textList = new ArrayList<Entity>();
+			for(int i=0; i < texts.length; i++){
+				textList.add(os.getLightweightEntityById(texts[i]));
+			}
+		}else{
+			textList = os.getLightweightAssertions("TEXT", null, 100);	
+		}
+		 
+		int count = 0;
+		for(Entity text : textList){
+			System.out.println(count + ")\t" + text.toString());
+			
+			if (text.isLightweight()) {
+				text = os.getEntityContent(text);
+			}
+			
+			Resource textResource = createResource(text);
+		
+			for(Relation rel : text.getTargetRelations("is_exemplar_of", "WITNESS")){
+				createWitnessResource(
+						os.getEntityById(rel.getSourceId())).
+						addValue(getRelURI("is_exemplar_of"), 
+								textResource
+								);
+			}	
+			System.out.println();
+			count++;
+		}
+		
+		long start = System.currentTimeMillis();
+		
+		Writer.writeRdfXml(new File(fileName), graph);
+		//Writer.writeNTriples(new File(fileName), graph);
+		/*
+		try {
+			System.out.println(tryWriteNTriple(graph));
+		} catch (Exception e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}*/
+		
+		System.out.println("Time writting [ms]=" + (System.currentTimeMillis() - start));
+		System.out.println("OK");
+		System.exit(0);
+	}
+	
+	public static String tryWriteNTriple(Graph graph) throws Exception {
+		OutputStream output = new OutputStream()
+	    {
+	        private StringBuilder string = new StringBuilder();
+	        @Override
+	        public void write(int b) throws IOException {
+	            this.string.append((char) b );
+	        }
+
+	        //Netbeans IDE automatically overrides this toString()
+	        public String toString(){
+	            return this.string.toString();
+	        }
+	    };
+	    try {
+	    	final RdfWriter writer = new NTriplesWriterImpl();
+	    	try {
+	    		writer.write(graph, output);
+	    	} finally {
+	    		writer.close();
+	    	}
+	    }finally {
+	    	output.close();
+	    }
+	    return output.toString();
+	}
+	
+	private Resource createWitnessResource(Entity witness) throws Exception{
+		System.out.print("W");
+		Resource witnessResource = createResource(witness);
+		
+		for(Relation rel : witness.getSourceRelations("is_part_of", "CODEX")){
+			witnessResource.addValue(getRelURI("is_part_of"), createCodexResource(os.getEntityById(rel.getTargetId())));
+		}
+		return witnessResource;
+	}
+	
+	private Resource createCodexResource(Entity codex) throws Exception{
+		System.out.print("C");
+		Resource codexResource = createResource(codex);
+		
+		for(Relation rel : codex.getSourceRelations("is_part_of", "COLLECTION")){
+			codexResource.addValue(getRelURI("is_part_of"), createCollectionResource(os.getEntityById(rel.getTargetId())));
+		}
+		return codexResource;
+	}
+	
+	private Resource createCollectionResource(Entity collection) throws Exception{
+		System.out.print("L");
+		Resource collectionResource = createResource(collection);
+		
+		for(Relation rel : collection.getSourceRelations("is_part_of", "REPOSITORY")){
+			collectionResource.addValue(getRelURI("is_part_of"), createRepositoryResource(os.getEntityById(rel.getTargetId())));
+		}
+		return collectionResource;
+	}
+	
+	private Resource createRepositoryResource(Entity repository) throws Exception{
+		System.out.print("R");
+		Resource repositoryResource = createResource(repository);
+		
+		for(Relation rel : repository.getSourceRelations("is_in", "PLACE")){
+			repositoryResource.addValue(getRelURI("is_in"), createResource(os.getEntityById(rel.getTargetId())));
+			System.out.print("P");
+		}
+		return repositoryResource;
+	}
+	
+	
+	/**
+	 * generate the resource from an entity and for each attribute will be generated a Literal
+	 * @param entity
+	 * @return
+	 */
+	private Resource createResource(Entity entity) throws Exception{
+		if (entity.isLightweight()) {
+			entity = os.getEntityContent(entity);
+		}
+		URI textURI = URI.create(mpiwg + entity.getObjectClass() + "/" + entity.getId());
+		Resource resource = elementFactory.createResource(textURI);
+		att2Literals(entity, resource);
+		
+		//addtype
+		resource.addValue(getRDFType(), entity.getObjectClass());
+		//resource.addValue(getRDFType(), "http://www.europeana.eu/schemas/edm/ProvidedCHO");
+		//resource.addValue(getEDMType(), "TEXT");
+		
+		return resource;
+	}
+	
+	private URI rdfType;
+	private URI edmType;
+	private URI getRDFType(){
+		if(rdfType == null){
+			try {
+				rdfType = new URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
+			} catch (URISyntaxException e) {
+				e.printStackTrace();
+			}
+		}
+		return rdfType;
+	}
+	
+	private URI getEDMType(){
+		if(rdfType == null){
+			try {
+				rdfType = new URI("http://www.europeana.eu/schemas/edm/type");
+			} catch (URISyntaxException e) {
+				e.printStackTrace();
+			}
+		}
+		return rdfType;
+	}
+	
+	
+	
+	
+	private void att2Literals(Entity entity, Resource resource){
+		for(Attribute att : entity.getAttributes()){
+			if(StringUtils.isNotEmpty(att.getValue())){
+				resource.addValue(getAttURI(att.getObjectClass()), att.getValue());
+			}
+		}
+	}
+	
+	
+	private URI getRelURI(String relName){
+		URI uri = relURIMap.get(relName);
+		if(uri == null){
+			String uriName = attNameToURIName(relName);
+			uri = URI.create(mpiwg + uriName);
+			relURIMap.put(relName, uri);
+		}
+		return uri;
+	}
+	
+	private URI getAttURI(String attName){
+		URI uri = attURIMap.get(attName);
+		if(uri == null){
+			String uriName = attNameToURIName(attName);
+			uri = URI.create(mpiwg + uriName);
+			attURIMap.put(attName, uri);
+		}
+		return uri;
+	}
+	
+	private static String attNameToURIName(String attName){
+		String[] words = attName.split("_");
+		if(words.length > 1){
+			StringBuilder sb = new StringBuilder(words[0]);
+			for(int i = 1; i < words.length; i++){
+				sb.append(Character.toUpperCase(words[i].charAt(0)));
+				sb.append(words[i].substring(1));
+			}
+			
+			return sb.toString();
+		}else{
+			return attName;
+		}
+	}
+	
+	
+	
+	public static void main(String[] args) {
+		//System.out.println(attNameToURIName("diagrams_and_illustrations"));
+		//System.out.println(attNameToURIName("hola"));
+		//System.out.println(attNameToURIName("diagrams_and"));
+		
+		ServiceRegistry services = new ServiceRegistry();
+		RDFGenerator rdfGenerator = 
+			new RDFGenerator(
+					services.getWrapper(), 
+					"/Users/jurzua/Projects/DM2E/Silk/ismi/ismi_data_source.xml");
+					//"/Users/jurzua/Projects/workspace/EDM/ISMI/rdf/rdf_text_id_415640.xml");
+		//rdfGenerator.execute(415640);
+		try {
+			rdfGenerator.execute(415640, 447023, 40979, 458950, 202603);
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+	}
+	
+}