annotate docs/RDFGenerator.java @ 33:e52f593f9e0d

new transaction logger "openmind.transactionlog" logging entity save actions and their data. more comments in code.
author casties
date Fri, 26 Aug 2016 11:42:41 +0200
parents 1e4835334837
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
jurzua
parents:
diff changeset
1 package org.mpi.openmind.scripts;
jurzua
parents:
diff changeset
2
jurzua
parents:
diff changeset
3 import java.io.File;
jurzua
parents:
diff changeset
4 import java.io.FileOutputStream;
jurzua
parents:
diff changeset
5 import java.io.IOException;
jurzua
parents:
diff changeset
6 import java.io.OutputStream;
jurzua
parents:
diff changeset
7 import java.net.URI;
jurzua
parents:
diff changeset
8 import java.net.URISyntaxException;
jurzua
parents:
diff changeset
9 import java.util.ArrayList;
jurzua
parents:
diff changeset
10 import java.util.HashMap;
jurzua
parents:
diff changeset
11 import java.util.List;
jurzua
parents:
diff changeset
12 import java.util.Map;
jurzua
parents:
diff changeset
13
jurzua
parents:
diff changeset
14 import org.apache.commons.lang.StringUtils;
jurzua
parents:
diff changeset
15 import org.jrdf.JRDFFactory;
jurzua
parents:
diff changeset
16 import org.jrdf.SortedMemoryJRDFFactory;
jurzua
parents:
diff changeset
17 import org.jrdf.graph.Graph;
jurzua
parents:
diff changeset
18 import org.jrdf.graph.GraphElementFactory;
jurzua
parents:
diff changeset
19 import org.jrdf.graph.Resource;
jurzua
parents:
diff changeset
20 import org.jrdf.writer.RdfWriter;
jurzua
parents:
diff changeset
21 import org.jrdf.writer.Writer;
jurzua
parents:
diff changeset
22 import org.jrdf.writer.ntriples.NTriplesWriterImpl;
jurzua
parents:
diff changeset
23 import org.mpi.openmind.cache.WrapperService;
jurzua
parents:
diff changeset
24 import org.mpi.openmind.repository.bo.Attribute;
jurzua
parents:
diff changeset
25 import org.mpi.openmind.repository.bo.Entity;
jurzua
parents:
diff changeset
26 import org.mpi.openmind.repository.bo.Relation;
jurzua
parents:
diff changeset
27 import org.mpi.openmind.repository.services.ServiceRegistry;
jurzua
parents:
diff changeset
28
jurzua
parents:
diff changeset
29 public class RDFGenerator {
jurzua
parents:
diff changeset
30
jurzua
parents:
diff changeset
31 //private OntologyService os;
jurzua
parents:
diff changeset
32 private WrapperService os;
jurzua
parents:
diff changeset
33 private String fileName;
jurzua
parents:
diff changeset
34 private JRDFFactory jrdfFactory;
jurzua
parents:
diff changeset
35 private Graph graph;
jurzua
parents:
diff changeset
36 private GraphElementFactory elementFactory;
jurzua
parents:
diff changeset
37
jurzua
parents:
diff changeset
38 public String mpiwg = "http://www.mpiwg.de/ismi/";
jurzua
parents:
diff changeset
39
jurzua
parents:
diff changeset
40 private Map<String, URI> attURIMap = new HashMap<String, URI>();
jurzua
parents:
diff changeset
41 private Map<String, URI> relURIMap = new HashMap<String, URI>();
jurzua
parents:
diff changeset
42
jurzua
parents:
diff changeset
43
jurzua
parents:
diff changeset
44 public RDFGenerator(WrapperService os, String fileName){
jurzua
parents:
diff changeset
45 this.os = os;
jurzua
parents:
diff changeset
46 this.fileName = fileName;
jurzua
parents:
diff changeset
47
jurzua
parents:
diff changeset
48 this.jrdfFactory = SortedMemoryJRDFFactory.getFactory();
jurzua
parents:
diff changeset
49 this.graph = jrdfFactory.getGraph();
jurzua
parents:
diff changeset
50 this.elementFactory = graph.getElementFactory();
jurzua
parents:
diff changeset
51 }
jurzua
parents:
diff changeset
52
jurzua
parents:
diff changeset
53 public void execute(long ... texts) throws Exception{
jurzua
parents:
diff changeset
54 List<Entity> textList = null;
jurzua
parents:
diff changeset
55 if(texts.length > 0){
jurzua
parents:
diff changeset
56 textList = new ArrayList<Entity>();
jurzua
parents:
diff changeset
57 for(int i=0; i < texts.length; i++){
jurzua
parents:
diff changeset
58 textList.add(os.getLightweightEntityById(texts[i]));
jurzua
parents:
diff changeset
59 }
jurzua
parents:
diff changeset
60 }else{
jurzua
parents:
diff changeset
61 textList = os.getLightweightAssertions("TEXT", null, 100);
jurzua
parents:
diff changeset
62 }
jurzua
parents:
diff changeset
63
jurzua
parents:
diff changeset
64 int count = 0;
jurzua
parents:
diff changeset
65 for(Entity text : textList){
jurzua
parents:
diff changeset
66 System.out.println(count + ")\t" + text.toString());
jurzua
parents:
diff changeset
67
jurzua
parents:
diff changeset
68 if (text.isLightweight()) {
jurzua
parents:
diff changeset
69 text = os.getEntityContent(text);
jurzua
parents:
diff changeset
70 }
jurzua
parents:
diff changeset
71
jurzua
parents:
diff changeset
72 Resource textResource = createResource(text);
jurzua
parents:
diff changeset
73
jurzua
parents:
diff changeset
74 for(Relation rel : text.getTargetRelations("is_exemplar_of", "WITNESS")){
jurzua
parents:
diff changeset
75 createWitnessResource(
jurzua
parents:
diff changeset
76 os.getEntityById(rel.getSourceId())).
jurzua
parents:
diff changeset
77 addValue(getRelURI("is_exemplar_of"),
jurzua
parents:
diff changeset
78 textResource
jurzua
parents:
diff changeset
79 );
jurzua
parents:
diff changeset
80 }
jurzua
parents:
diff changeset
81 System.out.println();
jurzua
parents:
diff changeset
82 count++;
jurzua
parents:
diff changeset
83 }
jurzua
parents:
diff changeset
84
jurzua
parents:
diff changeset
85 long start = System.currentTimeMillis();
jurzua
parents:
diff changeset
86
jurzua
parents:
diff changeset
87 Writer.writeRdfXml(new File(fileName), graph);
jurzua
parents:
diff changeset
88 //Writer.writeNTriples(new File(fileName), graph);
jurzua
parents:
diff changeset
89 /*
jurzua
parents:
diff changeset
90 try {
jurzua
parents:
diff changeset
91 System.out.println(tryWriteNTriple(graph));
jurzua
parents:
diff changeset
92 } catch (Exception e) {
jurzua
parents:
diff changeset
93 // TODO Auto-generated catch block
jurzua
parents:
diff changeset
94 e.printStackTrace();
jurzua
parents:
diff changeset
95 }*/
jurzua
parents:
diff changeset
96
jurzua
parents:
diff changeset
97 System.out.println("Time writting [ms]=" + (System.currentTimeMillis() - start));
jurzua
parents:
diff changeset
98 System.out.println("OK");
jurzua
parents:
diff changeset
99 System.exit(0);
jurzua
parents:
diff changeset
100 }
jurzua
parents:
diff changeset
101
jurzua
parents:
diff changeset
102 public static String tryWriteNTriple(Graph graph) throws Exception {
jurzua
parents:
diff changeset
103 OutputStream output = new OutputStream()
jurzua
parents:
diff changeset
104 {
jurzua
parents:
diff changeset
105 private StringBuilder string = new StringBuilder();
jurzua
parents:
diff changeset
106 @Override
jurzua
parents:
diff changeset
107 public void write(int b) throws IOException {
jurzua
parents:
diff changeset
108 this.string.append((char) b );
jurzua
parents:
diff changeset
109 }
jurzua
parents:
diff changeset
110
jurzua
parents:
diff changeset
111 //Netbeans IDE automatically overrides this toString()
jurzua
parents:
diff changeset
112 public String toString(){
jurzua
parents:
diff changeset
113 return this.string.toString();
jurzua
parents:
diff changeset
114 }
jurzua
parents:
diff changeset
115 };
jurzua
parents:
diff changeset
116 try {
jurzua
parents:
diff changeset
117 final RdfWriter writer = new NTriplesWriterImpl();
jurzua
parents:
diff changeset
118 try {
jurzua
parents:
diff changeset
119 writer.write(graph, output);
jurzua
parents:
diff changeset
120 } finally {
jurzua
parents:
diff changeset
121 writer.close();
jurzua
parents:
diff changeset
122 }
jurzua
parents:
diff changeset
123 }finally {
jurzua
parents:
diff changeset
124 output.close();
jurzua
parents:
diff changeset
125 }
jurzua
parents:
diff changeset
126 return output.toString();
jurzua
parents:
diff changeset
127 }
jurzua
parents:
diff changeset
128
jurzua
parents:
diff changeset
129 private Resource createWitnessResource(Entity witness) throws Exception{
jurzua
parents:
diff changeset
130 System.out.print("W");
jurzua
parents:
diff changeset
131 Resource witnessResource = createResource(witness);
jurzua
parents:
diff changeset
132
jurzua
parents:
diff changeset
133 for(Relation rel : witness.getSourceRelations("is_part_of", "CODEX")){
jurzua
parents:
diff changeset
134 witnessResource.addValue(getRelURI("is_part_of"), createCodexResource(os.getEntityById(rel.getTargetId())));
jurzua
parents:
diff changeset
135 }
jurzua
parents:
diff changeset
136 return witnessResource;
jurzua
parents:
diff changeset
137 }
jurzua
parents:
diff changeset
138
jurzua
parents:
diff changeset
139 private Resource createCodexResource(Entity codex) throws Exception{
jurzua
parents:
diff changeset
140 System.out.print("C");
jurzua
parents:
diff changeset
141 Resource codexResource = createResource(codex);
jurzua
parents:
diff changeset
142
jurzua
parents:
diff changeset
143 for(Relation rel : codex.getSourceRelations("is_part_of", "COLLECTION")){
jurzua
parents:
diff changeset
144 codexResource.addValue(getRelURI("is_part_of"), createCollectionResource(os.getEntityById(rel.getTargetId())));
jurzua
parents:
diff changeset
145 }
jurzua
parents:
diff changeset
146 return codexResource;
jurzua
parents:
diff changeset
147 }
jurzua
parents:
diff changeset
148
jurzua
parents:
diff changeset
149 private Resource createCollectionResource(Entity collection) throws Exception{
jurzua
parents:
diff changeset
150 System.out.print("L");
jurzua
parents:
diff changeset
151 Resource collectionResource = createResource(collection);
jurzua
parents:
diff changeset
152
jurzua
parents:
diff changeset
153 for(Relation rel : collection.getSourceRelations("is_part_of", "REPOSITORY")){
jurzua
parents:
diff changeset
154 collectionResource.addValue(getRelURI("is_part_of"), createRepositoryResource(os.getEntityById(rel.getTargetId())));
jurzua
parents:
diff changeset
155 }
jurzua
parents:
diff changeset
156 return collectionResource;
jurzua
parents:
diff changeset
157 }
jurzua
parents:
diff changeset
158
jurzua
parents:
diff changeset
159 private Resource createRepositoryResource(Entity repository) throws Exception{
jurzua
parents:
diff changeset
160 System.out.print("R");
jurzua
parents:
diff changeset
161 Resource repositoryResource = createResource(repository);
jurzua
parents:
diff changeset
162
jurzua
parents:
diff changeset
163 for(Relation rel : repository.getSourceRelations("is_in", "PLACE")){
jurzua
parents:
diff changeset
164 repositoryResource.addValue(getRelURI("is_in"), createResource(os.getEntityById(rel.getTargetId())));
jurzua
parents:
diff changeset
165 System.out.print("P");
jurzua
parents:
diff changeset
166 }
jurzua
parents:
diff changeset
167 return repositoryResource;
jurzua
parents:
diff changeset
168 }
jurzua
parents:
diff changeset
169
jurzua
parents:
diff changeset
170
jurzua
parents:
diff changeset
171 /**
jurzua
parents:
diff changeset
172 * generate the resource from an entity and for each attribute will be generated a Literal
jurzua
parents:
diff changeset
173 * @param entity
jurzua
parents:
diff changeset
174 * @return
jurzua
parents:
diff changeset
175 */
jurzua
parents:
diff changeset
176 private Resource createResource(Entity entity) throws Exception{
jurzua
parents:
diff changeset
177 if (entity.isLightweight()) {
jurzua
parents:
diff changeset
178 entity = os.getEntityContent(entity);
jurzua
parents:
diff changeset
179 }
jurzua
parents:
diff changeset
180 URI textURI = URI.create(mpiwg + entity.getObjectClass() + "/" + entity.getId());
jurzua
parents:
diff changeset
181 Resource resource = elementFactory.createResource(textURI);
jurzua
parents:
diff changeset
182 att2Literals(entity, resource);
jurzua
parents:
diff changeset
183
jurzua
parents:
diff changeset
184 //addtype
jurzua
parents:
diff changeset
185 resource.addValue(getRDFType(), entity.getObjectClass());
jurzua
parents:
diff changeset
186 //resource.addValue(getRDFType(), "http://www.europeana.eu/schemas/edm/ProvidedCHO");
jurzua
parents:
diff changeset
187 //resource.addValue(getEDMType(), "TEXT");
jurzua
parents:
diff changeset
188
jurzua
parents:
diff changeset
189 return resource;
jurzua
parents:
diff changeset
190 }
jurzua
parents:
diff changeset
191
jurzua
parents:
diff changeset
192 private URI rdfType;
jurzua
parents:
diff changeset
193 private URI edmType;
jurzua
parents:
diff changeset
194 private URI getRDFType(){
jurzua
parents:
diff changeset
195 if(rdfType == null){
jurzua
parents:
diff changeset
196 try {
jurzua
parents:
diff changeset
197 rdfType = new URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
jurzua
parents:
diff changeset
198 } catch (URISyntaxException e) {
jurzua
parents:
diff changeset
199 e.printStackTrace();
jurzua
parents:
diff changeset
200 }
jurzua
parents:
diff changeset
201 }
jurzua
parents:
diff changeset
202 return rdfType;
jurzua
parents:
diff changeset
203 }
jurzua
parents:
diff changeset
204
jurzua
parents:
diff changeset
205 private URI getEDMType(){
jurzua
parents:
diff changeset
206 if(rdfType == null){
jurzua
parents:
diff changeset
207 try {
jurzua
parents:
diff changeset
208 rdfType = new URI("http://www.europeana.eu/schemas/edm/type");
jurzua
parents:
diff changeset
209 } catch (URISyntaxException e) {
jurzua
parents:
diff changeset
210 e.printStackTrace();
jurzua
parents:
diff changeset
211 }
jurzua
parents:
diff changeset
212 }
jurzua
parents:
diff changeset
213 return rdfType;
jurzua
parents:
diff changeset
214 }
jurzua
parents:
diff changeset
215
jurzua
parents:
diff changeset
216
jurzua
parents:
diff changeset
217
jurzua
parents:
diff changeset
218
jurzua
parents:
diff changeset
219 private void att2Literals(Entity entity, Resource resource){
jurzua
parents:
diff changeset
220 for(Attribute att : entity.getAttributes()){
jurzua
parents:
diff changeset
221 if(StringUtils.isNotEmpty(att.getValue())){
jurzua
parents:
diff changeset
222 resource.addValue(getAttURI(att.getObjectClass()), att.getValue());
jurzua
parents:
diff changeset
223 }
jurzua
parents:
diff changeset
224 }
jurzua
parents:
diff changeset
225 }
jurzua
parents:
diff changeset
226
jurzua
parents:
diff changeset
227
jurzua
parents:
diff changeset
228 private URI getRelURI(String relName){
jurzua
parents:
diff changeset
229 URI uri = relURIMap.get(relName);
jurzua
parents:
diff changeset
230 if(uri == null){
jurzua
parents:
diff changeset
231 String uriName = attNameToURIName(relName);
jurzua
parents:
diff changeset
232 uri = URI.create(mpiwg + uriName);
jurzua
parents:
diff changeset
233 relURIMap.put(relName, uri);
jurzua
parents:
diff changeset
234 }
jurzua
parents:
diff changeset
235 return uri;
jurzua
parents:
diff changeset
236 }
jurzua
parents:
diff changeset
237
jurzua
parents:
diff changeset
238 private URI getAttURI(String attName){
jurzua
parents:
diff changeset
239 URI uri = attURIMap.get(attName);
jurzua
parents:
diff changeset
240 if(uri == null){
jurzua
parents:
diff changeset
241 String uriName = attNameToURIName(attName);
jurzua
parents:
diff changeset
242 uri = URI.create(mpiwg + uriName);
jurzua
parents:
diff changeset
243 attURIMap.put(attName, uri);
jurzua
parents:
diff changeset
244 }
jurzua
parents:
diff changeset
245 return uri;
jurzua
parents:
diff changeset
246 }
jurzua
parents:
diff changeset
247
jurzua
parents:
diff changeset
248 private static String attNameToURIName(String attName){
jurzua
parents:
diff changeset
249 String[] words = attName.split("_");
jurzua
parents:
diff changeset
250 if(words.length > 1){
jurzua
parents:
diff changeset
251 StringBuilder sb = new StringBuilder(words[0]);
jurzua
parents:
diff changeset
252 for(int i = 1; i < words.length; i++){
jurzua
parents:
diff changeset
253 sb.append(Character.toUpperCase(words[i].charAt(0)));
jurzua
parents:
diff changeset
254 sb.append(words[i].substring(1));
jurzua
parents:
diff changeset
255 }
jurzua
parents:
diff changeset
256
jurzua
parents:
diff changeset
257 return sb.toString();
jurzua
parents:
diff changeset
258 }else{
jurzua
parents:
diff changeset
259 return attName;
jurzua
parents:
diff changeset
260 }
jurzua
parents:
diff changeset
261 }
jurzua
parents:
diff changeset
262
jurzua
parents:
diff changeset
263
jurzua
parents:
diff changeset
264
jurzua
parents:
diff changeset
265 public static void main(String[] args) {
jurzua
parents:
diff changeset
266 //System.out.println(attNameToURIName("diagrams_and_illustrations"));
jurzua
parents:
diff changeset
267 //System.out.println(attNameToURIName("hola"));
jurzua
parents:
diff changeset
268 //System.out.println(attNameToURIName("diagrams_and"));
jurzua
parents:
diff changeset
269
jurzua
parents:
diff changeset
270 ServiceRegistry services = new ServiceRegistry();
jurzua
parents:
diff changeset
271 RDFGenerator rdfGenerator =
jurzua
parents:
diff changeset
272 new RDFGenerator(
jurzua
parents:
diff changeset
273 services.getWrapper(),
jurzua
parents:
diff changeset
274 "/Users/jurzua/Projects/DM2E/Silk/ismi/ismi_data_source.xml");
jurzua
parents:
diff changeset
275 //"/Users/jurzua/Projects/workspace/EDM/ISMI/rdf/rdf_text_id_415640.xml");
jurzua
parents:
diff changeset
276 //rdfGenerator.execute(415640);
jurzua
parents:
diff changeset
277 try {
jurzua
parents:
diff changeset
278 rdfGenerator.execute(415640, 447023, 40979, 458950, 202603);
jurzua
parents:
diff changeset
279 } catch (Exception e) {
jurzua
parents:
diff changeset
280 e.printStackTrace();
jurzua
parents:
diff changeset
281 }
jurzua
parents:
diff changeset
282 }
jurzua
parents:
diff changeset
283
jurzua
parents:
diff changeset
284 }