0
|
1 package de.mpiwg.itgroup.nimanager.importer;
|
|
2
|
|
3 import java.net.URISyntaxException;
|
|
4 import java.net.URL;
|
|
5 import java.sql.Connection;
|
|
6 import java.sql.DriverManager;
|
|
7 import java.sql.ResultSet;
|
|
8 import java.sql.SQLException;
|
|
9 import java.util.Collection;
|
|
10 import java.util.HashMap;
|
|
11 import java.util.List;
|
|
12 import java.util.Map;
|
|
13
|
|
14 import org.apache.log4j.BasicConfigurator;
|
|
15 import org.apache.log4j.Level;
|
|
16 import org.apache.log4j.Logger;
|
|
17 import org.openrdf.model.Resource;
|
|
18 import org.openrdf.model.Statement;
|
|
19 import org.openrdf.model.URI;
|
|
20 import org.openrdf.model.Value;
|
|
21 import org.openrdf.model.impl.LiteralImpl;
|
|
22 import org.openrdf.query.BindingSet;
|
|
23 import org.openrdf.query.MalformedQueryException;
|
|
24 import org.openrdf.query.QueryEvaluationException;
|
|
25 import org.openrdf.query.QueryLanguage;
|
|
26 import org.openrdf.query.TupleQuery;
|
|
27 import org.openrdf.query.TupleQueryResult;
|
|
28 import org.openrdf.repository.RepositoryConnection;
|
|
29 import org.openrdf.repository.RepositoryException;
|
|
30 import org.openrdf.repository.RepositoryResult;
|
|
31
|
|
32 import com.hp.hpl.jena.graph.impl.LiteralLabel;
|
|
33
|
4
|
34 import de.mpiwg.itgroup.triplestoremanager.exceptions.TripleStoreHandlerException;
|
|
35 import de.mpiwg.itgroup.triplestoremanager.owl.MetaDataHandler;
|
|
36 import de.mpiwg.itgroup.triplestoremanager.owl.TripleStoreHandler;
|
|
37
|
|
38
|
0
|
39 import edu.stanford.smi.protegex.owl.jena.JenaOWLModel;
|
|
40 import edu.stanford.smi.protegex.owl.model.OWLIndividual;
|
|
41 import edu.stanford.smi.protegex.owl.model.RDFProperty;
|
|
42 import edu.stanford.smi.protegex.owl.model.RDFSLiteral;
|
|
43
|
|
44 public class Import {
|
|
45
|
|
46 private MetaDataHandler mh;
|
|
47 private TripleStoreHandler th;
|
|
48 private Logger logger = Logger.getRootLogger();
|
|
49 // private Connection con;
|
|
50 private String offset;
|
|
51 private String limit;
|
|
52
|
|
53 public Import(MetaDataHandler mh, TripleStoreHandler th, String offset,
|
|
54 String limit) throws SQLException, ClassNotFoundException {
|
|
55 this.mh = mh;
|
|
56 this.th = th;
|
|
57 this.offset = offset;
|
|
58 this.limit = limit;
|
|
59
|
|
60 }
|
|
61
|
|
62 private void createMPIWGIdentifiers(String clsName,
|
|
63 HashMap<String, String> mapping, String inCtx, String outCtx)
|
|
64 throws RepositoryException, MalformedQueryException,
|
|
65 QueryEvaluationException, URISyntaxException,
|
|
66 TripleStoreHandlerException, SQLException {
|
|
67
|
|
68 int newPersonID = 0;
|
|
69 int namedEntityIdentifierID= 0;
|
|
70 int namedEntityIdentifierCreationID= 0;
|
|
71
|
|
72 List<String> classes = mh.getEquivalentClasses(clsName); // suche alle
|
|
73 // aequivalenten
|
|
74 // Klassen
|
|
75 classes.add(clsName); // add the classname it self;
|
2
|
76
|
0
|
77 OWLIndividual providerMPIWG = th.getProvider(mh,
|
|
78 TripleStoreHandler.ONTOLOGY_NS + "provider_MPIWG");
|
|
79
|
|
80 if (providerMPIWG == null) {
|
|
81 providerMPIWG = createMPIWFProvider(outCtx);
|
|
82 }
|
|
83
|
|
84 OWLIndividual providerDbPedia = th.getProvider(mh,
|
|
85 TripleStoreHandler.ONTOLOGY_NS + "provider_wikipedia");
|
|
86
|
|
87 if (providerDbPedia == null) {
|
|
88 providerDbPedia = createDbPediaProvider(outCtx);
|
|
89 }
|
|
90 for (String cl : classes) { // gehe durch die klassen
|
|
91 RepositoryConnection con = th.getRepository().getConnection();
|
2
|
92
|
0
|
93 String queryString = "SELECT DISTINCT ?s FROM <"
|
|
94 + inCtx
|
|
95 + "> WHERE {?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <"
|
|
96 + cl + "> .} OFFSET " + offset + " LIMIT " + limit;
|
2
|
97
|
0
|
98 TupleQueryResult result = th.querySPARQL(queryString);
|
|
99
|
|
100
|
|
101 while (result.hasNext()) {
|
|
102 BindingSet bindingSet = result.next();
|
|
103 Value subjValue = bindingSet.getValue("s");
|
|
104 URI subj = th.getRepository().getValueFactory()
|
|
105 .createURI(subjValue.stringValue());
|
|
106
|
|
107 HashMap<String, Object> newValues = new HashMap<String, Object>();// werte
|
|
108 // die
|
|
109 // neu
|
|
110 // eingetragen
|
|
111 // werden
|
|
112 // muessen
|
|
113 for (String key : mapping.keySet()) {
|
|
114
|
|
115 RepositoryResult<Statement> namesStatements = con
|
|
116 .getStatements(subj, th.getRepository()
|
|
117 .getValueFactory().createURI(key), null,
|
|
118 false);
|
|
119 Statement firstStatement = TripleStoreHandler
|
|
120 .getFirstStatement(namesStatements);
|
|
121
|
|
122 if (firstStatement != null) {
|
|
123 Object newValue;
|
|
124 Value val = firstStatement.getObject();
|
|
125
|
|
126 if (LiteralImpl.class.isInstance(val)) { // wenn ein
|
|
127 // string
|
|
128 // literal,
|
|
129 // dann
|
|
130 // uebersetze
|
|
131 // in jena
|
|
132 // string
|
|
133 // literal
|
|
134 LiteralImpl li = (LiteralImpl) val;
|
|
135
|
|
136 newValue = mh.getOwlModel()
|
|
137 .createRDFSLiteralOrString(li.getLabel(),
|
|
138 li.getLanguage());
|
|
139 } else { // anderfalls dern string wert = uri
|
|
140 newValue = val.stringValue();
|
|
141
|
|
142 }
|
|
143
|
|
144 newValues.put(mapping.get(key), newValue);
|
|
145 }
|
|
146 }
|
|
147
|
|
148 // first create the new person
|
|
149
|
|
150
|
|
151 Boolean ex = checkExistance(newValues,th, mapping.values(),
|
|
152 outCtx);
|
|
153
|
|
154 if (ex) {
|
|
155 logger.info("nothing to be done!");
|
|
156 continue;
|
|
157 }
|
|
158 newPersonID = getNewId(newPersonID, "Person", outCtx);
|
|
159 logger.info("New ID choosen:" + String.valueOf(newPersonID));
|
|
160 OWLIndividual person = mh.generateEntity(
|
|
161 TripleStoreHandler.ONTOLOGY_NS + "Person",
|
|
162 TripleStoreHandler.ONTOLOGY_NS + "Person:"
|
|
163 + String.valueOf(newPersonID), newValues);
|
|
164 mh.printIndividual(person);
|
|
165
|
|
166 newPersonID += 1;
|
|
167 // Boolean ex = checkExistance(person, th, mapping.values(),
|
|
168 // outCtx);
|
|
169 // if (ex) {
|
|
170 // logger.info("nothing to be done!");
|
|
171 // person.delete();
|
|
172 // continue;
|
|
173 // }
|
|
174 th.write(person, outCtx);
|
|
175
|
|
176 // now we create the MPIWG identifier and connect it to the
|
|
177 // person
|
|
178 HashMap<String, Object> idValues = new HashMap<String, Object>();
|
|
179 idValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
180 + "is_preferred_namedEntityIdentifier", person);
|
|
181 idValues.put(
|
|
182 "http://erlangen-crm.org/plus/xdt/110404/has_XSD_String",
|
|
183 "Person:" + String.valueOf(newPersonID)); // TODO PID
|
|
184 // GENERATOR
|
|
185
|
|
186 namedEntityIdentifierID = getNewId(namedEntityIdentifierID, "NamedEntityIdentifier", outCtx);
|
|
187 OWLIndividual mpiwgIdentifier = mh.generateEntity(
|
|
188 TripleStoreHandler.ONTOLOGY_NS
|
|
189 + "NamedEntityIdentifier",
|
|
190 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifier:"
|
|
191 + String.valueOf(namedEntityIdentifierID),
|
|
192 idValues);
|
|
193
|
|
194 namedEntityIdentifierID+=1;
|
|
195 th.write(mpiwgIdentifier, outCtx);
|
|
196
|
|
197 // now create the creation object and connect it to the MPIWG
|
|
198 // identifier and the provider MPIWG
|
|
199 HashMap<String, Object> creationValues = new HashMap<String, Object>();
|
|
200 creationValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
201 + "carriedOutByNamedEntityProvider", providerMPIWG);
|
|
202 creationValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
203 + "created_NamedEntityIdentifier", mpiwgIdentifier);
|
|
204
|
|
205
|
|
206 namedEntityIdentifierCreationID = getNewId(namedEntityIdentifierCreationID, "NamedEntityIdentifierCreation", outCtx);
|
|
207
|
|
208 OWLIndividual creation = mh.generateEntity(
|
|
209 TripleStoreHandler.ONTOLOGY_NS
|
|
210 + "NamedEntityIdentifierCreation",
|
|
211 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifierCreation:"
|
|
212 + String.valueOf(namedEntityIdentifierCreationID),
|
|
213 creationValues);
|
|
214 namedEntityIdentifierCreationID+=1;
|
|
215
|
|
216 th.write(creation, outCtx);
|
|
217
|
|
218 // now create the wikipedia identifier and connect it to the
|
|
219 // person
|
|
220 HashMap<String, Object> db_idValues = new HashMap<String, Object>();
|
|
221 db_idValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
222 + "identifies_NamedEntity", person);
|
|
223
|
|
224 // identifier is the url at dbpedia
|
|
225 db_idValues
|
|
226 .put("http://erlangen-crm.org/plus/xdt/110404/has_XSD_String",
|
|
227 subjValue.stringValue());
|
|
228
|
|
229 namedEntityIdentifierID = getNewId(namedEntityIdentifierID, "NamedEntityIdentifier", outCtx);
|
|
230
|
|
231 OWLIndividual dbIdentifier = mh.generateEntity(
|
|
232 TripleStoreHandler.ONTOLOGY_NS
|
|
233 + "NamedEntityIdentifier",
|
|
234 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifier:"
|
|
235 + String.valueOf(namedEntityIdentifierID),
|
|
236 db_idValues);
|
|
237
|
|
238 namedEntityIdentifierID+=1;
|
|
239
|
|
240 th.write(dbIdentifier, outCtx);
|
|
241
|
|
242 // now create the creation object and connect it to the dbpedia
|
|
243 // identifier and the provider pdbedia
|
|
244 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
|
|
245 db_creationValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
246 + "carriedOutByNamedEntityProvider", providerDbPedia);
|
|
247 db_creationValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
248 + "created_NamedEntityIdentifier", dbIdentifier);
|
|
249
|
|
250
|
|
251 namedEntityIdentifierCreationID = getNewId(namedEntityIdentifierCreationID, "NamedEntityIdentifierCreation", outCtx);
|
|
252
|
|
253 OWLIndividual dbcreation = mh.generateEntity(
|
|
254 TripleStoreHandler.ONTOLOGY_NS
|
|
255 + "NamedEntityIdentifierCreation",
|
|
256 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifierCreation:"
|
|
257 + String.valueOf(namedEntityIdentifierCreationID),
|
|
258 db_creationValues);
|
|
259 namedEntityIdentifierCreationID+=1;
|
|
260
|
|
261
|
|
262
|
|
263 th.write(dbcreation, outCtx);
|
|
264
|
|
265 // add the bbpedia identifier to the triple store
|
|
266 th.write(subj.stringValue(),
|
|
267 "http://erlangen-crm.org/110404/P1_is_identified_by",
|
|
268 dbIdentifier.getURI(), outCtx);
|
|
269 ;
|
|
270
|
|
271 }
|
|
272 }
|
|
273
|
|
274 }
|
|
275
|
|
276 private Boolean checkExistance(HashMap<String, Object> newValues,
|
|
277 TripleStoreHandler th2, Collection<String> props, String outCtx) throws RepositoryException {
|
|
278
|
|
279 String queryString = "select ?x FROM <" + outCtx + "> " + "where {";
|
|
280 for (String propString : props) {
|
|
281 //RDFProperty rdfProp = model.getRDFProperty(propString);
|
|
282 RDFSLiteral val = (RDFSLiteral) newValues.get(propString);
|
|
283 if (val==null)
|
|
284 continue;
|
|
285 String lang = val.getLanguage();
|
|
286 String str = val.getString();
|
|
287 str = str.replace("\"", "\\\"");
|
|
288 queryString += "?x <" + propString + "> \"" + str + "\"@" + lang
|
|
289 + ".";
|
|
290 }
|
|
291
|
|
292 queryString += " }";
|
|
293 TupleQueryResult result;
|
|
294 try {
|
|
295 result = th.querySPARQL(queryString);
|
|
296 } catch (MalformedQueryException e) {
|
|
297 logger.error("Query String cannot be handled:" + queryString);
|
|
298 return false;
|
|
299 } catch (QueryEvaluationException e) {
|
|
300 logger.error("Query String cannot be handled:" + queryString);
|
|
301 return false;
|
|
302 } catch (TripleStoreHandlerException e) {
|
|
303 e.printStackTrace();
|
|
304 logger.error("Query String cannot be handled:" + queryString);
|
|
305 return false;
|
|
306 }
|
|
307 try {
|
|
308 if (result.hasNext())
|
|
309 return true;
|
|
310 else
|
|
311 return false;
|
|
312 } catch (QueryEvaluationException e) {
|
|
313 logger.error("Query String cannot be handled:" + queryString);
|
|
314 return false;
|
|
315 }
|
|
316
|
|
317 }
|
|
318 private int getNewId(int startnumber, String identifier, String ctx)
|
|
319 throws SQLException {
|
|
320
|
|
321 java.sql.Statement smt = th.sqlCon.createStatement();
|
|
322 Boolean exists = true;
|
|
323
|
|
324 while (exists) {
|
|
325 String cmdString = String.format(
|
|
326 "sparql select count(*) from <%s> where {<%s> ?x ?y}",
|
|
327 ctx,
|
2
|
328 TripleStoreHandler.ONTOLOGY_NS + identifier +":"
|
0
|
329 + String.valueOf(startnumber));
|
|
330 smt.execute(cmdString);
|
|
331 ResultSet rs = smt.getResultSet();
|
|
332 rs.next();
|
|
333 int count = rs.getInt(1);
|
|
334 if (count > 0) {
|
|
335 startnumber += 1;
|
|
336 } else {
|
|
337 exists = false;
|
|
338 }
|
|
339 }
|
|
340
|
|
341 return startnumber;
|
|
342 }
|
|
343
|
|
344 private Boolean checkExistance(OWLIndividual person,
|
|
345 TripleStoreHandler th2, Collection<String> props, String outCtx)
|
|
346 throws RepositoryException {
|
|
347 JenaOWLModel model = mh.getOwlModel();
|
|
348 //Map<String, String> vals = new HashMap<String, String>();
|
|
349
|
|
350 String queryString = "select ?x FROM <" + outCtx + "> " + "where {";
|
|
351 for (String propString : props) {
|
|
352 RDFProperty rdfProp = model.getRDFProperty(propString);
|
|
353 RDFSLiteral val = (RDFSLiteral) person.getPropertyValue(rdfProp);
|
|
354 if (val == null)
|
|
355 continue;
|
|
356 String lang = val.getLanguage();
|
|
357 String str = val.getString();
|
|
358 str = str.replace("\"", "\\\"");
|
|
359 queryString += "?x <" + propString + "> \"" + str + "\"@" + lang
|
|
360 + ".";
|
|
361 }
|
|
362
|
|
363 queryString += " }";
|
|
364 TupleQueryResult result;
|
|
365 try {
|
|
366 result = th.querySPARQL(queryString);
|
|
367 } catch (MalformedQueryException e) {
|
|
368 logger.error("Query String cannot be handled:" + queryString);
|
|
369 return false;
|
|
370 } catch (QueryEvaluationException e) {
|
|
371 logger.error("Query String cannot be handled:" + queryString);
|
|
372 return false;
|
|
373 } catch (TripleStoreHandlerException e) {
|
|
374 logger.error("Query String cannot be handled:" + queryString);
|
|
375 return false;
|
|
376 }
|
|
377 try {
|
|
378 if (result.hasNext())
|
|
379 return true;
|
|
380 else
|
|
381 return false;
|
|
382 } catch (QueryEvaluationException e) {
|
|
383 logger.error("Query String cannot be handled:" + queryString);
|
|
384 return false;
|
|
385 }
|
|
386
|
|
387 }
|
|
388
|
|
389 private OWLIndividual createDbPediaProvider(String ctx)
|
|
390 throws RepositoryException, TripleStoreHandlerException {
|
|
391 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
|
|
392
|
|
393 OWLIndividual dbcreation = mh.generateEntity(
|
|
394 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider",
|
|
395 TripleStoreHandler.ONTOLOGY_NS + "provider_wikipedia");
|
|
396
|
|
397 th.write(dbcreation, ctx);
|
|
398
|
|
399 return dbcreation;
|
|
400
|
|
401 }
|
|
402
|
|
403 private OWLIndividual createMPIWFProvider(String ctx)
|
|
404 throws RepositoryException, TripleStoreHandlerException {
|
|
405 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
|
|
406
|
|
407 OWLIndividual dbcreation = mh.generateEntity(
|
|
408 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider",
|
|
409 TripleStoreHandler.ONTOLOGY_NS + "provider_MPIWG");
|
|
410
|
|
411 th.write(dbcreation, ctx);
|
|
412
|
|
413 return dbcreation;
|
|
414
|
|
415 }
|
|
416
|
|
417 public static void main(String args[]) throws Exception {
|
1
|
418 if (args.length < 4) {
|
|
419 System.out.println("usage: import user pw offset limit ");
|
0
|
420 System.exit(1);
|
|
421 }
|
|
422
|
|
423 Logger.getRootLogger().setLevel(Level.INFO);
|
|
424 BasicConfigurator.configure();
|
|
425
|
|
426 MetaDataHandler mh = new MetaDataHandler();
|
|
427
|
|
428 TripleStoreHandler th = new TripleStoreHandler(
|
1
|
429 "jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1111",args[0], args[1]);
|
0
|
430 System.out.println(mh);
|
|
431 System.out.println(th);
|
|
432
|
1
|
433 Import imp = new Import(mh, th, args[2], args[3]);
|
0
|
434
|
|
435 HashMap<String, String> mapping = new HashMap<String, String>();
|
|
436 mapping.put("http://xmlns.com/foaf/0.1/surname",
|
|
437 "http://xmlns.com/foaf/0.1/lastName");
|
|
438 mapping.put("http://xmlns.com/foaf/0.1/givenName",
|
|
439 "http://xmlns.com/foaf/0.1/firstName");
|
|
440 mapping.put("http://xmlns.com/foaf/0.1/name",
|
|
441 "http://xmlns.com/foaf/0.1/name");
|
|
442
|
|
443 imp.createMPIWGIdentifiers("http://dbpedia.org/ontology/Person",
|
|
444 mapping, "file://personendataWikipedia",
|
2
|
445 "file://mpiwg_persons_2.rdf");
|
0
|
446
|
|
447 // mh.getOwlModel().save(new java.net.URI("file:///tmp/prot.owl"));
|
|
448 }
|
|
449
|
|
450 }
|