Mercurial > hg > openmind
view docs/RDFGenerator.java @ 86:d4b456623d43
Updated XML export. Saves relation source-type and target-type. Expanded statistics with per-entity-type relation counts.
author | Robert Casties <casties@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 05 Feb 2018 20:06:38 +0100 |
parents | 1e4835334837 |
children |
line wrap: on
line source
package org.mpi.openmind.scripts; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.jrdf.JRDFFactory; import org.jrdf.SortedMemoryJRDFFactory; import org.jrdf.graph.Graph; import org.jrdf.graph.GraphElementFactory; import org.jrdf.graph.Resource; import org.jrdf.writer.RdfWriter; import org.jrdf.writer.Writer; import org.jrdf.writer.ntriples.NTriplesWriterImpl; import org.mpi.openmind.cache.WrapperService; import org.mpi.openmind.repository.bo.Attribute; import org.mpi.openmind.repository.bo.Entity; import org.mpi.openmind.repository.bo.Relation; import org.mpi.openmind.repository.services.ServiceRegistry; public class RDFGenerator { //private OntologyService os; private WrapperService os; private String fileName; private JRDFFactory jrdfFactory; private Graph graph; private GraphElementFactory elementFactory; public String mpiwg = "http://www.mpiwg.de/ismi/"; private Map<String, URI> attURIMap = new HashMap<String, URI>(); private Map<String, URI> relURIMap = new HashMap<String, URI>(); public RDFGenerator(WrapperService os, String fileName){ this.os = os; this.fileName = fileName; this.jrdfFactory = SortedMemoryJRDFFactory.getFactory(); this.graph = jrdfFactory.getGraph(); this.elementFactory = graph.getElementFactory(); } public void execute(long ... texts) throws Exception{ List<Entity> textList = null; if(texts.length > 0){ textList = new ArrayList<Entity>(); for(int i=0; i < texts.length; i++){ textList.add(os.getLightweightEntityById(texts[i])); } }else{ textList = os.getLightweightAssertions("TEXT", null, 100); } int count = 0; for(Entity text : textList){ System.out.println(count + ")\t" + text.toString()); if (text.isLightweight()) { text = os.getEntityContent(text); } Resource textResource = createResource(text); for(Relation rel : text.getTargetRelations("is_exemplar_of", "WITNESS")){ createWitnessResource( os.getEntityById(rel.getSourceId())). addValue(getRelURI("is_exemplar_of"), textResource ); } System.out.println(); count++; } long start = System.currentTimeMillis(); Writer.writeRdfXml(new File(fileName), graph); //Writer.writeNTriples(new File(fileName), graph); /* try { System.out.println(tryWriteNTriple(graph)); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); }*/ System.out.println("Time writting [ms]=" + (System.currentTimeMillis() - start)); System.out.println("OK"); System.exit(0); } public static String tryWriteNTriple(Graph graph) throws Exception { OutputStream output = new OutputStream() { private StringBuilder string = new StringBuilder(); @Override public void write(int b) throws IOException { this.string.append((char) b ); } //Netbeans IDE automatically overrides this toString() public String toString(){ return this.string.toString(); } }; try { final RdfWriter writer = new NTriplesWriterImpl(); try { writer.write(graph, output); } finally { writer.close(); } }finally { output.close(); } return output.toString(); } private Resource createWitnessResource(Entity witness) throws Exception{ System.out.print("W"); Resource witnessResource = createResource(witness); for(Relation rel : witness.getSourceRelations("is_part_of", "CODEX")){ witnessResource.addValue(getRelURI("is_part_of"), createCodexResource(os.getEntityById(rel.getTargetId()))); } return witnessResource; } private Resource createCodexResource(Entity codex) throws Exception{ System.out.print("C"); Resource codexResource = createResource(codex); for(Relation rel : codex.getSourceRelations("is_part_of", "COLLECTION")){ codexResource.addValue(getRelURI("is_part_of"), createCollectionResource(os.getEntityById(rel.getTargetId()))); } return codexResource; } private Resource createCollectionResource(Entity collection) throws Exception{ System.out.print("L"); Resource collectionResource = createResource(collection); for(Relation rel : collection.getSourceRelations("is_part_of", "REPOSITORY")){ collectionResource.addValue(getRelURI("is_part_of"), createRepositoryResource(os.getEntityById(rel.getTargetId()))); } return collectionResource; } private Resource createRepositoryResource(Entity repository) throws Exception{ System.out.print("R"); Resource repositoryResource = createResource(repository); for(Relation rel : repository.getSourceRelations("is_in", "PLACE")){ repositoryResource.addValue(getRelURI("is_in"), createResource(os.getEntityById(rel.getTargetId()))); System.out.print("P"); } return repositoryResource; } /** * generate the resource from an entity and for each attribute will be generated a Literal * @param entity * @return */ private Resource createResource(Entity entity) throws Exception{ if (entity.isLightweight()) { entity = os.getEntityContent(entity); } URI textURI = URI.create(mpiwg + entity.getObjectClass() + "/" + entity.getId()); Resource resource = elementFactory.createResource(textURI); att2Literals(entity, resource); //addtype resource.addValue(getRDFType(), entity.getObjectClass()); //resource.addValue(getRDFType(), "http://www.europeana.eu/schemas/edm/ProvidedCHO"); //resource.addValue(getEDMType(), "TEXT"); return resource; } private URI rdfType; private URI edmType; private URI getRDFType(){ if(rdfType == null){ try { rdfType = new URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); } catch (URISyntaxException e) { e.printStackTrace(); } } return rdfType; } private URI getEDMType(){ if(rdfType == null){ try { rdfType = new URI("http://www.europeana.eu/schemas/edm/type"); } catch (URISyntaxException e) { e.printStackTrace(); } } return rdfType; } private void att2Literals(Entity entity, Resource resource){ for(Attribute att : entity.getAttributes()){ if(StringUtils.isNotEmpty(att.getValue())){ resource.addValue(getAttURI(att.getObjectClass()), att.getValue()); } } } private URI getRelURI(String relName){ URI uri = relURIMap.get(relName); if(uri == null){ String uriName = attNameToURIName(relName); uri = URI.create(mpiwg + uriName); relURIMap.put(relName, uri); } return uri; } private URI getAttURI(String attName){ URI uri = attURIMap.get(attName); if(uri == null){ String uriName = attNameToURIName(attName); uri = URI.create(mpiwg + uriName); attURIMap.put(attName, uri); } return uri; } private static String attNameToURIName(String attName){ String[] words = attName.split("_"); if(words.length > 1){ StringBuilder sb = new StringBuilder(words[0]); for(int i = 1; i < words.length; i++){ sb.append(Character.toUpperCase(words[i].charAt(0))); sb.append(words[i].substring(1)); } return sb.toString(); }else{ return attName; } } public static void main(String[] args) { //System.out.println(attNameToURIName("diagrams_and_illustrations")); //System.out.println(attNameToURIName("hola")); //System.out.println(attNameToURIName("diagrams_and")); ServiceRegistry services = new ServiceRegistry(); RDFGenerator rdfGenerator = new RDFGenerator( services.getWrapper(), "/Users/jurzua/Projects/DM2E/Silk/ismi/ismi_data_source.xml"); //"/Users/jurzua/Projects/workspace/EDM/ISMI/rdf/rdf_text_id_415640.xml"); //rdfGenerator.execute(415640); try { rdfGenerator.execute(415640, 447023, 40979, 458950, 202603); } catch (Exception e) { e.printStackTrace(); } } }