view docs/RDFGenerator.java @ 86:d4b456623d43

Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Mon, 05 Feb 2018 20:06:38 +0100
parents 1e4835334837
children
line wrap: on
line source

package org.mpi.openmind.scripts;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.jrdf.JRDFFactory;
import org.jrdf.SortedMemoryJRDFFactory;
import org.jrdf.graph.Graph;
import org.jrdf.graph.GraphElementFactory;
import org.jrdf.graph.Resource;
import org.jrdf.writer.RdfWriter;
import org.jrdf.writer.Writer;
import org.jrdf.writer.ntriples.NTriplesWriterImpl;
import org.mpi.openmind.cache.WrapperService;
import org.mpi.openmind.repository.bo.Attribute;
import org.mpi.openmind.repository.bo.Entity;
import org.mpi.openmind.repository.bo.Relation;
import org.mpi.openmind.repository.services.ServiceRegistry;

public class RDFGenerator {

	//private OntologyService os;
	private WrapperService os;
	private String fileName;
	private JRDFFactory jrdfFactory;
	private Graph graph;
	private GraphElementFactory elementFactory;
	
	public String mpiwg = "http://www.mpiwg.de/ismi/";
	
	private Map<String, URI> attURIMap = new HashMap<String, URI>();
	private Map<String, URI> relURIMap = new HashMap<String, URI>();
	
	
	public RDFGenerator(WrapperService os, String fileName){
		this.os = os;
		this.fileName = fileName;
		
		this.jrdfFactory = SortedMemoryJRDFFactory.getFactory();
		this.graph = jrdfFactory.getGraph();
		this.elementFactory = graph.getElementFactory();
	}
	
	public void execute(long ... texts) throws Exception{
		List<Entity> textList = null;
		if(texts.length > 0){
			textList = new ArrayList<Entity>();
			for(int i=0; i < texts.length; i++){
				textList.add(os.getLightweightEntityById(texts[i]));
			}
		}else{
			textList = os.getLightweightAssertions("TEXT", null, 100);	
		}
		 
		int count = 0;
		for(Entity text : textList){
			System.out.println(count + ")\t" + text.toString());
			
			if (text.isLightweight()) {
				text = os.getEntityContent(text);
			}
			
			Resource textResource = createResource(text);
		
			for(Relation rel : text.getTargetRelations("is_exemplar_of", "WITNESS")){
				createWitnessResource(
						os.getEntityById(rel.getSourceId())).
						addValue(getRelURI("is_exemplar_of"), 
								textResource
								);
			}	
			System.out.println();
			count++;
		}
		
		long start = System.currentTimeMillis();
		
		Writer.writeRdfXml(new File(fileName), graph);
		//Writer.writeNTriples(new File(fileName), graph);
		/*
		try {
			System.out.println(tryWriteNTriple(graph));
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}*/
		
		System.out.println("Time writting [ms]=" + (System.currentTimeMillis() - start));
		System.out.println("OK");
		System.exit(0);
	}
	
	public static String tryWriteNTriple(Graph graph) throws Exception {
		OutputStream output = new OutputStream()
	    {
	        private StringBuilder string = new StringBuilder();
	        @Override
	        public void write(int b) throws IOException {
	            this.string.append((char) b );
	        }

	        //Netbeans IDE automatically overrides this toString()
	        public String toString(){
	            return this.string.toString();
	        }
	    };
	    try {
	    	final RdfWriter writer = new NTriplesWriterImpl();
	    	try {
	    		writer.write(graph, output);
	    	} finally {
	    		writer.close();
	    	}
	    }finally {
	    	output.close();
	    }
	    return output.toString();
	}
	
	private Resource createWitnessResource(Entity witness) throws Exception{
		System.out.print("W");
		Resource witnessResource = createResource(witness);
		
		for(Relation rel : witness.getSourceRelations("is_part_of", "CODEX")){
			witnessResource.addValue(getRelURI("is_part_of"), createCodexResource(os.getEntityById(rel.getTargetId())));
		}
		return witnessResource;
	}
	
	private Resource createCodexResource(Entity codex) throws Exception{
		System.out.print("C");
		Resource codexResource = createResource(codex);
		
		for(Relation rel : codex.getSourceRelations("is_part_of", "COLLECTION")){
			codexResource.addValue(getRelURI("is_part_of"), createCollectionResource(os.getEntityById(rel.getTargetId())));
		}
		return codexResource;
	}
	
	private Resource createCollectionResource(Entity collection) throws Exception{
		System.out.print("L");
		Resource collectionResource = createResource(collection);
		
		for(Relation rel : collection.getSourceRelations("is_part_of", "REPOSITORY")){
			collectionResource.addValue(getRelURI("is_part_of"), createRepositoryResource(os.getEntityById(rel.getTargetId())));
		}
		return collectionResource;
	}
	
	private Resource createRepositoryResource(Entity repository) throws Exception{
		System.out.print("R");
		Resource repositoryResource = createResource(repository);
		
		for(Relation rel : repository.getSourceRelations("is_in", "PLACE")){
			repositoryResource.addValue(getRelURI("is_in"), createResource(os.getEntityById(rel.getTargetId())));
			System.out.print("P");
		}
		return repositoryResource;
	}
	
	
	/**
	 * generate the resource from an entity and for each attribute will be generated a Literal
	 * @param entity
	 * @return
	 */
	private Resource createResource(Entity entity) throws Exception{
		if (entity.isLightweight()) {
			entity = os.getEntityContent(entity);
		}
		URI textURI = URI.create(mpiwg + entity.getObjectClass() + "/" + entity.getId());
		Resource resource = elementFactory.createResource(textURI);
		att2Literals(entity, resource);
		
		//addtype
		resource.addValue(getRDFType(), entity.getObjectClass());
		//resource.addValue(getRDFType(), "http://www.europeana.eu/schemas/edm/ProvidedCHO");
		//resource.addValue(getEDMType(), "TEXT");
		
		return resource;
	}
	
	private URI rdfType;
	private URI edmType;
	private URI getRDFType(){
		if(rdfType == null){
			try {
				rdfType = new URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
			} catch (URISyntaxException e) {
				e.printStackTrace();
			}
		}
		return rdfType;
	}
	
	private URI getEDMType(){
		if(rdfType == null){
			try {
				rdfType = new URI("http://www.europeana.eu/schemas/edm/type");
			} catch (URISyntaxException e) {
				e.printStackTrace();
			}
		}
		return rdfType;
	}
	
	
	
	
	private void att2Literals(Entity entity, Resource resource){
		for(Attribute att : entity.getAttributes()){
			if(StringUtils.isNotEmpty(att.getValue())){
				resource.addValue(getAttURI(att.getObjectClass()), att.getValue());
			}
		}
	}
	
	
	private URI getRelURI(String relName){
		URI uri = relURIMap.get(relName);
		if(uri == null){
			String uriName = attNameToURIName(relName);
			uri = URI.create(mpiwg + uriName);
			relURIMap.put(relName, uri);
		}
		return uri;
	}
	
	private URI getAttURI(String attName){
		URI uri = attURIMap.get(attName);
		if(uri == null){
			String uriName = attNameToURIName(attName);
			uri = URI.create(mpiwg + uriName);
			attURIMap.put(attName, uri);
		}
		return uri;
	}
	
	private static String attNameToURIName(String attName){
		String[] words = attName.split("_");
		if(words.length > 1){
			StringBuilder sb = new StringBuilder(words[0]);
			for(int i = 1; i < words.length; i++){
				sb.append(Character.toUpperCase(words[i].charAt(0)));
				sb.append(words[i].substring(1));
			}
			
			return sb.toString();
		}else{
			return attName;
		}
	}
	
	
	
	public static void main(String[] args) {
		//System.out.println(attNameToURIName("diagrams_and_illustrations"));
		//System.out.println(attNameToURIName("hola"));
		//System.out.println(attNameToURIName("diagrams_and"));
		
		ServiceRegistry services = new ServiceRegistry();
		RDFGenerator rdfGenerator = 
			new RDFGenerator(
					services.getWrapper(), 
					"/Users/jurzua/Projects/DM2E/Silk/ismi/ismi_data_source.xml");
					//"/Users/jurzua/Projects/workspace/EDM/ISMI/rdf/rdf_text_id_415640.xml");
		//rdfGenerator.execute(415640);
		try {
			rdfGenerator.execute(415640, 447023, 40979, 458950, 202603);
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
}