Mercurial > hg > openmind
changeset 3:1e4835334837
(none)
author | jurzua |
---|---|
date | Wed, 29 Oct 2014 13:29:59 +0000 |
parents | 0e0082e1e12f |
children | 8ce07918ec8a |
files | docs/RDFGenerator.java docs/RDFTest.java docs/readme.txt |
diffstat | 3 files changed, 439 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docs/RDFGenerator.java Wed Oct 29 13:29:59 2014 +0000 @@ -0,0 +1,284 @@ +package org.mpi.openmind.scripts; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.lang.StringUtils; +import org.jrdf.JRDFFactory; +import org.jrdf.SortedMemoryJRDFFactory; +import org.jrdf.graph.Graph; +import org.jrdf.graph.GraphElementFactory; +import org.jrdf.graph.Resource; +import org.jrdf.writer.RdfWriter; +import org.jrdf.writer.Writer; +import org.jrdf.writer.ntriples.NTriplesWriterImpl; +import org.mpi.openmind.cache.WrapperService; +import org.mpi.openmind.repository.bo.Attribute; +import org.mpi.openmind.repository.bo.Entity; +import org.mpi.openmind.repository.bo.Relation; +import org.mpi.openmind.repository.services.ServiceRegistry; + +public class RDFGenerator { + + //private OntologyService os; + private WrapperService os; + private String fileName; + private JRDFFactory jrdfFactory; + private Graph graph; + private GraphElementFactory elementFactory; + + public String mpiwg = "http://www.mpiwg.de/ismi/"; + + private Map<String, URI> attURIMap = new HashMap<String, URI>(); + private Map<String, URI> relURIMap = new HashMap<String, URI>(); + + + public RDFGenerator(WrapperService os, String fileName){ + this.os = os; + this.fileName = fileName; + + this.jrdfFactory = SortedMemoryJRDFFactory.getFactory(); + this.graph = jrdfFactory.getGraph(); + this.elementFactory = graph.getElementFactory(); + } + + public void execute(long ... texts) throws Exception{ + List<Entity> textList = null; + if(texts.length > 0){ + textList = new ArrayList<Entity>(); + for(int i=0; i < texts.length; i++){ + textList.add(os.getLightweightEntityById(texts[i])); + } + }else{ + textList = os.getLightweightAssertions("TEXT", null, 100); + } + + int count = 0; + for(Entity text : textList){ + System.out.println(count + ")\t" + text.toString()); + + if (text.isLightweight()) { + text = os.getEntityContent(text); + } + + Resource textResource = createResource(text); + + for(Relation rel : text.getTargetRelations("is_exemplar_of", "WITNESS")){ + createWitnessResource( + os.getEntityById(rel.getSourceId())). + addValue(getRelURI("is_exemplar_of"), + textResource + ); + } + System.out.println(); + count++; + } + + long start = System.currentTimeMillis(); + + Writer.writeRdfXml(new File(fileName), graph); + //Writer.writeNTriples(new File(fileName), graph); + /* + try { + System.out.println(tryWriteNTriple(graph)); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + }*/ + + System.out.println("Time writting [ms]=" + (System.currentTimeMillis() - start)); + System.out.println("OK"); + System.exit(0); + } + + public static String tryWriteNTriple(Graph graph) throws Exception { + OutputStream output = new OutputStream() + { + private StringBuilder string = new StringBuilder(); + @Override + public void write(int b) throws IOException { + this.string.append((char) b ); + } + + //Netbeans IDE automatically overrides this toString() + public String toString(){ + return this.string.toString(); + } + }; + try { + final RdfWriter writer = new NTriplesWriterImpl(); + try { + writer.write(graph, output); + } finally { + writer.close(); + } + }finally { + output.close(); + } + return output.toString(); + } + + private Resource createWitnessResource(Entity witness) throws Exception{ + System.out.print("W"); + Resource witnessResource = createResource(witness); + + for(Relation rel : witness.getSourceRelations("is_part_of", "CODEX")){ + witnessResource.addValue(getRelURI("is_part_of"), createCodexResource(os.getEntityById(rel.getTargetId()))); + } + return witnessResource; + } + + private Resource createCodexResource(Entity codex) throws Exception{ + System.out.print("C"); + Resource codexResource = createResource(codex); + + for(Relation rel : codex.getSourceRelations("is_part_of", "COLLECTION")){ + codexResource.addValue(getRelURI("is_part_of"), createCollectionResource(os.getEntityById(rel.getTargetId()))); + } + return codexResource; + } + + private Resource createCollectionResource(Entity collection) throws Exception{ + System.out.print("L"); + Resource collectionResource = createResource(collection); + + for(Relation rel : collection.getSourceRelations("is_part_of", "REPOSITORY")){ + collectionResource.addValue(getRelURI("is_part_of"), createRepositoryResource(os.getEntityById(rel.getTargetId()))); + } + return collectionResource; + } + + private Resource createRepositoryResource(Entity repository) throws Exception{ + System.out.print("R"); + Resource repositoryResource = createResource(repository); + + for(Relation rel : repository.getSourceRelations("is_in", "PLACE")){ + repositoryResource.addValue(getRelURI("is_in"), createResource(os.getEntityById(rel.getTargetId()))); + System.out.print("P"); + } + return repositoryResource; + } + + + /** + * generate the resource from an entity and for each attribute will be generated a Literal + * @param entity + * @return + */ + private Resource createResource(Entity entity) throws Exception{ + if (entity.isLightweight()) { + entity = os.getEntityContent(entity); + } + URI textURI = URI.create(mpiwg + entity.getObjectClass() + "/" + entity.getId()); + Resource resource = elementFactory.createResource(textURI); + att2Literals(entity, resource); + + //addtype + resource.addValue(getRDFType(), entity.getObjectClass()); + //resource.addValue(getRDFType(), "http://www.europeana.eu/schemas/edm/ProvidedCHO"); + //resource.addValue(getEDMType(), "TEXT"); + + return resource; + } + + private URI rdfType; + private URI edmType; + private URI getRDFType(){ + if(rdfType == null){ + try { + rdfType = new URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + } + return rdfType; + } + + private URI getEDMType(){ + if(rdfType == null){ + try { + rdfType = new URI("http://www.europeana.eu/schemas/edm/type"); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + } + return rdfType; + } + + + + + private void att2Literals(Entity entity, Resource resource){ + for(Attribute att : entity.getAttributes()){ + if(StringUtils.isNotEmpty(att.getValue())){ + resource.addValue(getAttURI(att.getObjectClass()), att.getValue()); + } + } + } + + + private URI getRelURI(String relName){ + URI uri = relURIMap.get(relName); + if(uri == null){ + String uriName = attNameToURIName(relName); + uri = URI.create(mpiwg + uriName); + relURIMap.put(relName, uri); + } + return uri; + } + + private URI getAttURI(String attName){ + URI uri = attURIMap.get(attName); + if(uri == null){ + String uriName = attNameToURIName(attName); + uri = URI.create(mpiwg + uriName); + attURIMap.put(attName, uri); + } + return uri; + } + + private static String attNameToURIName(String attName){ + String[] words = attName.split("_"); + if(words.length > 1){ + StringBuilder sb = new StringBuilder(words[0]); + for(int i = 1; i < words.length; i++){ + sb.append(Character.toUpperCase(words[i].charAt(0))); + sb.append(words[i].substring(1)); + } + + return sb.toString(); + }else{ + return attName; + } + } + + + + public static void main(String[] args) { + //System.out.println(attNameToURIName("diagrams_and_illustrations")); + //System.out.println(attNameToURIName("hola")); + //System.out.println(attNameToURIName("diagrams_and")); + + ServiceRegistry services = new ServiceRegistry(); + RDFGenerator rdfGenerator = + new RDFGenerator( + services.getWrapper(), + "/Users/jurzua/Projects/DM2E/Silk/ismi/ismi_data_source.xml"); + //"/Users/jurzua/Projects/workspace/EDM/ISMI/rdf/rdf_text_id_415640.xml"); + //rdfGenerator.execute(415640); + try { + rdfGenerator.execute(415640, 447023, 40979, 458950, 202603); + } catch (Exception e) { + e.printStackTrace(); + } + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docs/RDFTest.java Wed Oct 29 13:29:59 2014 +0000 @@ -0,0 +1,72 @@ +package org.mpi.openmind.rdf; + +import java.io.File; +import java.net.URI; + +import org.jrdf.JRDFFactory; +import org.jrdf.SortedMemoryJRDFFactory; +import org.jrdf.graph.AnyTriple; +import org.jrdf.graph.Graph; +import org.jrdf.graph.GraphElementFactory; +import org.jrdf.graph.Resource; +import org.jrdf.graph.Triple; +import org.jrdf.graph.TripleFactory; +import org.jrdf.parser.RdfReader; +import org.jrdf.util.ClosableIterator; +import org.jrdf.writer.RdfWriter; +import org.jrdf.writer.Writer; +import org.springframework.web.servlet.view.RedirectView; + +public class RDFTest { + + public static String mpiwg = "http://www.mpiwg.de/ismi/"; + + /** + * @param args + */ + public static void main(String[] args) { + + JRDFFactory jrdfFactory = SortedMemoryJRDFFactory.getFactory(); + Graph graph = jrdfFactory.getGraph(); + //RdfReader reader = new RdfReader(); + //Graph graph = reader.parseNTriples(new File("/Users/jurzua/test001.nt")); + TripleFactory tripleFactory = graph.getTripleFactory(); + GraphElementFactory elementFactory = graph.getElementFactory(); + + URI personURI = URI.create(mpiwg + "Person/01"); + URI berlinURI = URI.create(mpiwg + "Place/Berlin"); + + URI isPartOf = URI.create(mpiwg + "isPartOf"); + URI isBornIn = URI.create(mpiwg + "isBornIn"); + + Resource person = elementFactory.createResource(personURI); + Resource berlin = elementFactory.createResource(berlinURI); + + person.addValue(isPartOf, person); + person.addValue(isBornIn, berlin); + + + /* + URI uri1 = URI.create("urn:foo"); + URI uri2 = URI.create("urn:bar"); + + + + Triple t1 = tripleFactory.addTriple(uri1, uri1, uri1); + Triple t2 = tripleFactory.addTriple(uri2, uri2, uri2); + + System.out.println(t1); + System.out.println(t2); + //graph.remove(t1); + */ + + + + //Writer.writeNTriples(new File("/Users/jurzua/Projects/workspace/EDM/ISMI/rdf/rdf.xml"), graph); + Writer.writeRdfXml(new File("/Users/jurzua/Projects/workspace/EDM/ISMI/rdf/rdf.xml"), graph); + System.out.println("OK"); + + + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docs/readme.txt Wed Oct 29 13:29:59 2014 +0000 @@ -0,0 +1,83 @@ +mvn clean compile assembly:single +cp target/openmind-1.0.0-jar-with-dependencies.jar /Users/jurzua/om4.jar +mvn install:install-file -Dfile=/Users/jurzua/om4.jar -DgroupId=de.mpiwg.openmind -DartifactId=openmind -Dversion=1.0.0 -Dpackaging=jar + + + +Scripts + + mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.DoubleRelations" -Dexec.args="SHOW ismi ismipw" + mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.DoubleRelations" -Dexec.args="SHOW root e1nste1n" + + + mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve" + + mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve WITNESS" + + ----- + Problem modification time + + test with WITNESS 41578, + replace the codex. + + + AbstractPersistenceService + linea: 1075 + entity.addSourceRelation(rel) + + Cuando queremos salvar la entidad actual, necesitamos que el metodo addSourceRelation no tire una exception si + se encuentra con dos relaciones identicas. Por los datos estan malos, y necesitamos que se vayan limpiando solo. + Solucion: Necesitamos un nuevo metodo getEntityContent, el cual no use el metodo addSourceRelation, + para que acepte todo lo que encunetre en la base de datos. + + + WITNESS: + 41578 + + CODEX: + 297238 + + + Conclusions: + + for the case of Witness: + siempre que haya una lista de las relaciones is_part_of, hay que tomar la que tiene el mismo time_modif de su entity (Witness), por esa es la ultima modificación. + Si es que hay dos con el mismo time_modif, aun no hay solucion pensada. + + -------------- + TODO + Text without Author: + 52652 + + Solution: + 1. +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve WITNESS src:is_part_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve WITNESS src:is_exemplar_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve CODEX src:is_part_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve COLLECTION src:is_part_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve REPOSITORY src:is_in" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve CODEX tar:is_part_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve COLLECTION tar:is_part_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve REPOSITORY tar:is_part_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PLACE tar:is_in" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve TEXT tar:is_exemplar_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve TEXT src:was_created_by" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve TEXT tar:is_prime_alias_title_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve TEXT tar:is_alias_title_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve TEXT tar:is_translation_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON tar:was_created_by" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON src:lived_in" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON tar:is_reference_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON tar:is_alias_name_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON src:has_role" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve PERSON tar:was_copied_by" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve ALIAS src:is_prime_alias_title_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve ALIAS src:is_alias_title_of" +mvn exec:java -Dexec.mainClass="org.mpi.openmind.scripts.TimeModification" -Dexec.args="ismi ismipw solve ALIAS tar:has_title_written_as" + + ------------ + + Test cases: + Text: 101571, just resave + translated 4892 + \ No newline at end of file