2
|
1 package de.mpiwg.itgroup.nimanager.importer;
|
|
2
|
|
3 import java.awt.dnd.DnDConstants;
|
|
4 import java.io.FileWriter;
|
|
5 import java.io.IOException;
|
|
6 import java.io.PrintStream;
|
|
7 import java.net.URISyntaxException;
|
|
8 import java.net.URL;
|
|
9 import java.sql.Connection;
|
|
10 import java.sql.DriverManager;
|
|
11 import java.sql.ResultSet;
|
|
12 import java.sql.SQLException;
|
|
13 import java.util.Collection;
|
|
14 import java.util.HashMap;
|
|
15 import java.util.List;
|
|
16 import java.util.Map;
|
|
17
|
|
18 import org.apache.log4j.BasicConfigurator;
|
|
19 import org.apache.log4j.Level;
|
|
20 import org.apache.log4j.Logger;
|
|
21 import org.openrdf.model.Resource;
|
|
22 import org.openrdf.model.Statement;
|
|
23 import org.openrdf.model.URI;
|
|
24 import org.openrdf.model.Value;
|
|
25 import org.openrdf.model.impl.LiteralImpl;
|
|
26 import org.openrdf.query.Binding;
|
|
27 import org.openrdf.query.BindingSet;
|
|
28 import org.openrdf.query.MalformedQueryException;
|
|
29 import org.openrdf.query.Query;
|
|
30 import org.openrdf.query.QueryEvaluationException;
|
|
31 import org.openrdf.query.QueryLanguage;
|
|
32 import org.openrdf.query.TupleQuery;
|
|
33 import org.openrdf.query.TupleQueryResult;
|
|
34 import org.openrdf.repository.RepositoryConnection;
|
|
35 import org.openrdf.repository.RepositoryException;
|
|
36 import org.openrdf.repository.RepositoryResult;
|
|
37
|
|
38 import com.hp.hpl.jena.graph.impl.LiteralLabel;
|
|
39
|
|
40 import de.mpiwg.itgroup.nimanager.exceptions.TripleStoreHandlerException;
|
|
41 import de.mpiwg.itgroup.nimanager.owl.MetaDataHandler;
|
|
42 import de.mpiwg.itgroup.nimanager.owl.TripleStoreHandler;
|
|
43 import edu.stanford.smi.protegex.owl.jena.JenaOWLModel;
|
|
44 import edu.stanford.smi.protegex.owl.model.OWLIndividual;
|
|
45 import edu.stanford.smi.protegex.owl.model.RDFProperty;
|
|
46 import edu.stanford.smi.protegex.owl.model.RDFSLiteral;
|
|
47
|
|
48 public class ImportGND {
|
|
49
|
|
50 private MetaDataHandler mh;
|
|
51 private TripleStoreHandler th;
|
|
52 private Logger logger = Logger.getRootLogger();
|
|
53 // private Connection con;
|
|
54 private String offset;
|
|
55 private String limit;
|
|
56 private FileWriter dbpediaMissing;
|
|
57
|
|
58 private String mpiwgPerson = "file://mpiwg_persons_2.rdf";
|
|
59
|
|
60 public ImportGND(MetaDataHandler mh, TripleStoreHandler th, String offset,
|
|
61 String limit) throws SQLException, ClassNotFoundException {
|
|
62 this.mh = mh;
|
|
63 this.th = th;
|
|
64 this.offset = offset;
|
|
65 this.limit = limit;
|
|
66
|
|
67 }
|
|
68
|
|
69 private void createMPIWGFromGNDIdentifiers(String predicate,
|
|
70 HashMap<String, String> mapping, String inCtx, String outCtx)
|
|
71 throws RepositoryException, MalformedQueryException,
|
|
72 QueryEvaluationException, URISyntaxException,
|
|
73 TripleStoreHandlerException, SQLException, IOException {
|
|
74 createMPIWGFromGNDIdentifiers(predicate, mapping,
|
|
75 new HashMap<String, String>(), inCtx, outCtx);
|
|
76 }
|
|
77
|
|
78 // benutze predicate zur identifizierung der gnd eintrage, diese haben keine
|
|
79 // rdfs:type attribute, daher nehme ich alle eintraege die ein bestimmtes
|
|
80 // attribute hhaben
|
|
81 private void createMPIWGFromGNDIdentifiers(String predicate,
|
|
82 HashMap<String, String> mapping,
|
|
83 HashMap<String, String> complexMapping, String inCtx, String outCtx)
|
|
84 throws RepositoryException, MalformedQueryException,
|
|
85 QueryEvaluationException, URISyntaxException,
|
|
86 TripleStoreHandlerException, SQLException, IOException {
|
|
87
|
|
88 dbpediaMissing = new FileWriter("/tmp/missingDBPedia.txt");
|
|
89 int newPersonID = 0;
|
|
90 int namedEntityIdentifierID = 0;
|
|
91 int namedEntityIdentifierCreationID = 0;
|
|
92
|
|
93 // List<String> classes = mh.getEquivalentClasses(clsName); // suche
|
|
94 // alle
|
|
95 // aequivalenten
|
|
96 // Klassen
|
|
97 // classes.add(clsName); // add the classname it self;
|
|
98
|
|
99 OWLIndividual providerMPIWG = th.getProvider(mh,
|
|
100 TripleStoreHandler.ONTOLOGY_NS + "provider_MPIWG");
|
|
101
|
|
102 if (providerMPIWG == null) {
|
|
103 providerMPIWG = createMPIWFProvider(outCtx);
|
|
104 }
|
|
105
|
|
106 OWLIndividual providerDbPedia = th.getProvider(mh,
|
|
107 TripleStoreHandler.ONTOLOGY_NS + "provider_wikipedia");
|
|
108
|
|
109 if (providerDbPedia == null) {
|
|
110 providerDbPedia = createDbPediaProvider(outCtx);
|
|
111 }
|
|
112
|
|
113 OWLIndividual providerDNB = th.getProvider(mh,
|
|
114 TripleStoreHandler.ONTOLOGY_NS + "provider_DNB");
|
|
115
|
|
116 if (providerDNB == null) {
|
|
117 providerDNB = createDNBProvider(outCtx);
|
|
118 }
|
|
119
|
|
120 RepositoryConnection con = th.getRepository().getConnection();
|
|
121 // find all object
|
|
122 String queryString = "SELECT DISTINCT ?s FROM <" + inCtx
|
|
123 + "> WHERE {?s <" + predicate + ">" + "?o .} OFFSET " + offset
|
|
124 + " LIMIT " + limit;
|
|
125
|
|
126 TupleQueryResult result = th.querySPARQL(queryString);
|
|
127
|
|
128 while (result.hasNext()) {
|
|
129 BindingSet bindingSet = result.next();
|
|
130 Value subjValue = bindingSet.getValue("s");
|
|
131 URI gndPerson = th.getRepository().getValueFactory()
|
|
132 .createURI(subjValue.stringValue());
|
|
133
|
|
134 // check if dbpedia link exists
|
|
135 boolean createNew = false;
|
|
136 URI dbpedia = getDBPediaLink(gndPerson, inCtx);
|
|
137 if (dbpedia != null) {
|
|
138 // gibt es den entsprechenden Eintrag schon, dann hole die
|
|
139 // entprechende person
|
|
140 URI person = getPersonFromDBPedia(dbpedia);
|
|
141
|
|
142 if (person != null) {
|
|
143 OWLIndividual personInd = mh.generateEntity(
|
|
144 TripleStoreHandler.ONTOLOGY_NS + "Person",
|
|
145 person.toString());
|
|
146 addGNDToMPIWGIdentifier(gndPerson, personInd,
|
|
147 namedEntityIdentifierID, outCtx, providerDNB,
|
|
148 namedEntityIdentifierCreationID);
|
|
149 } else {
|
|
150 dbpediaMissing.write(subjValue.stringValue());
|
|
151 createNew = true;
|
|
152 }
|
|
153 } else {
|
|
154 createNew = true;
|
|
155 }
|
|
156
|
|
157 // zur GND gibt es noch keinen personen eintrag related zur DB
|
|
158 if (createNew) {
|
|
159
|
|
160 HashMap<String, Object> newValues = new HashMap<String, Object>();// werte
|
|
161 // die
|
|
162 // neu
|
|
163 // eingetragen
|
|
164 // werden
|
|
165 // muessen
|
|
166 for (String key : mapping.keySet()) {
|
|
167
|
|
168 RepositoryResult<Statement> namesStatements = con
|
|
169 .getStatements(gndPerson, th.getRepository()
|
|
170 .getValueFactory().createURI(key), null,
|
|
171 false);
|
|
172
|
|
173 while (namesStatements.hasNext()) {
|
|
174 Statement stmt = namesStatements.next();
|
|
175 Object newValue;
|
|
176 Value val = stmt.getObject();
|
|
177
|
|
178 // sollte literal sein
|
|
179 if (LiteralImpl.class.isInstance(val)) { // wenn ein
|
|
180 // string
|
|
181 // literal,
|
|
182 // dann
|
|
183 // uebersetze
|
|
184 // in jena
|
|
185 // string
|
|
186 // literal
|
|
187 LiteralImpl li = (LiteralImpl) val;
|
|
188
|
|
189 newValue = mh.getOwlModel()
|
|
190 .createRDFSLiteralOrString(li.getLabel(),
|
|
191 li.getLanguage());
|
|
192 newValues.put(mapping.get(key), newValue);
|
|
193 break;
|
|
194 // nicht mehr weiter suche, nimm also immer den
|
|
195 // ersten literal
|
|
196 }
|
|
197
|
|
198 }
|
|
199 }
|
|
200
|
|
201 for (String key : complexMapping.keySet()) {
|
|
202
|
|
203 String cmd = "select ?o from <" + inCtx + "> where {"
|
|
204 + String.format(key, gndPerson.stringValue()) + "}";
|
|
205 TupleQueryResult results = th.querySPARQL(cmd);
|
|
206
|
|
207 if (results.hasNext()) { // nimm nur das erste
|
|
208 BindingSet firstStatement = results.next();
|
|
209
|
|
210 Object newValue;
|
|
211 Value val = firstStatement.getBinding("o").getValue();
|
|
212
|
|
213 if (LiteralImpl.class.isInstance(val)) { // wenn ein
|
|
214 // string
|
|
215 // literal,
|
|
216 // dann
|
|
217 // uebersetze
|
|
218 // in jena
|
|
219 // string
|
|
220 // literal
|
|
221 LiteralImpl li = (LiteralImpl) val;
|
|
222
|
|
223 newValue = mh.getOwlModel()
|
|
224 .createRDFSLiteralOrString(li.getLabel(),
|
|
225 li.getLanguage());
|
|
226 } else { // anderfalls dern string wert = uri
|
|
227 newValue = val.stringValue();
|
|
228
|
|
229 }
|
|
230
|
|
231 newValues.put(complexMapping.get(key), newValue);
|
|
232 }
|
|
233 }
|
|
234
|
|
235 // first create the new person
|
|
236
|
|
237 Boolean ex = checkExistance(newValues, th, mapping.values(),
|
|
238 outCtx);
|
|
239
|
|
240 if (ex) {
|
|
241 logger.info("nothing to be done!");
|
|
242 continue;
|
|
243 }
|
|
244 newPersonID = getNewId(newPersonID, "Person", new String[] {
|
|
245 outCtx, mpiwgPerson });
|
|
246 logger.info("New ID choosen:" + String.valueOf(newPersonID));
|
|
247 OWLIndividual person = mh.generateEntity(
|
|
248 TripleStoreHandler.ONTOLOGY_NS + "Person",
|
|
249 TripleStoreHandler.ONTOLOGY_NS + "Person:"
|
|
250 + String.valueOf(newPersonID), newValues);
|
|
251 mh.printIndividual(person);
|
|
252
|
|
253 newPersonID += 1;
|
|
254 // Boolean ex = checkExistance(person, th, mapping.values(),
|
|
255 // outCtx);
|
|
256 // if (ex) {
|
|
257 // logger.info("nothing to be done!");
|
|
258 // person.delete();
|
|
259 // continue;
|
|
260 // }
|
|
261 th.write(person, outCtx);
|
|
262
|
|
263 // now we create the MPIWG identifier and connect it to the
|
|
264 // person
|
|
265 HashMap<String, Object> idValues = new HashMap<String, Object>();
|
|
266 idValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
267 + "is_preferred_namedEntityIdentifier", person);
|
|
268 idValues.put(
|
|
269 "http://erlangen-crm.org/plus/xdt/110404/has_XSD_String",
|
|
270 "Person:" + String.valueOf(newPersonID)); // TODO PID
|
|
271 // GENERATOR
|
|
272
|
|
273 namedEntityIdentifierID = getNewId(namedEntityIdentifierID,
|
|
274 "NamedEntityIdentifier", new String[] { outCtx,
|
|
275 mpiwgPerson });
|
|
276 OWLIndividual mpiwgIdentifier = mh.generateEntity(
|
|
277 TripleStoreHandler.ONTOLOGY_NS
|
|
278 + "NamedEntityIdentifier",
|
|
279 TripleStoreHandler.ONTOLOGY_NS
|
|
280 + "NamedEntityIdentifier:"
|
|
281 + String.valueOf(namedEntityIdentifierID),
|
|
282 idValues);
|
|
283
|
|
284 namedEntityIdentifierID += 1;
|
|
285 th.write(mpiwgIdentifier, outCtx);
|
|
286
|
|
287 // now create the creation object and connect it to the MPIWG
|
|
288 // identifier and the provider MPIWG
|
|
289 HashMap<String, Object> creationValues = new HashMap<String, Object>();
|
|
290 creationValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
291 + "carriedOutByNamedEntityProvider", providerMPIWG);
|
|
292 creationValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
293 + "created_NamedEntityIdentifier", mpiwgIdentifier);
|
|
294
|
|
295 namedEntityIdentifierCreationID = getNewId(
|
|
296 namedEntityIdentifierCreationID,
|
|
297 "NamedEntityIdentifierCreation", new String[] { outCtx,
|
|
298 mpiwgPerson });
|
|
299
|
|
300 OWLIndividual creation = mh
|
|
301 .generateEntity(
|
|
302 TripleStoreHandler.ONTOLOGY_NS
|
|
303 + "NamedEntityIdentifierCreation",
|
|
304 TripleStoreHandler.ONTOLOGY_NS
|
|
305 + "NamedEntityIdentifierCreation:"
|
|
306 + String.valueOf(namedEntityIdentifierCreationID),
|
|
307 creationValues);
|
|
308 namedEntityIdentifierCreationID += 1;
|
|
309
|
|
310 th.write(creation, outCtx);
|
|
311
|
|
312 addGNDToMPIWGIdentifier(gndPerson, person,
|
|
313 namedEntityIdentifierID, outCtx, providerDNB,
|
|
314 namedEntityIdentifierCreationID);
|
|
315 }
|
|
316 }
|
|
317
|
|
318 }
|
|
319
|
|
320 private void addGNDToMPIWGIdentifier(URI gndPerson, OWLIndividual person,
|
|
321 int namedEntityIdentifierID, String outCtx, Object gndProvider,
|
|
322 int namedEntityIdentifierCreationID) throws SQLException,
|
|
323 RepositoryException, TripleStoreHandlerException {
|
|
324 // now create the gnd identifier and connect it to the
|
|
325 // person
|
|
326
|
|
327 HashMap<String, Object> db_idValues = new HashMap<String, Object>();
|
|
328 db_idValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
329 + "identifies_NamedEntity", person);
|
|
330
|
|
331 // identifier is the url at dng
|
|
332 db_idValues.put(
|
|
333 "http://erlangen-crm.org/plus/xdt/110404/has_XSD_String",
|
|
334 gndPerson.stringValue());
|
|
335
|
|
336 namedEntityIdentifierID = getNewId(namedEntityIdentifierID,
|
|
337 "NamedEntityIdentifier", new String[] { outCtx, mpiwgPerson });
|
|
338
|
|
339 OWLIndividual dbIdentifier = mh.generateEntity(
|
|
340 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifier",
|
|
341 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifier:"
|
|
342 + String.valueOf(namedEntityIdentifierID), db_idValues);
|
|
343
|
|
344 namedEntityIdentifierID += 1;
|
|
345
|
|
346 th.write(dbIdentifier, outCtx);
|
|
347
|
|
348 // now create the creation object and connect it to the gnd
|
|
349 // identifier and the provider gnd
|
|
350 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
|
|
351 db_creationValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
352 + "carriedOutByNamedEntityProvider", gndProvider);
|
|
353 db_creationValues.put(TripleStoreHandler.ONTOLOGY_NS
|
|
354 + "created_NamedEntityIdentifier", dbIdentifier);
|
|
355
|
|
356 namedEntityIdentifierCreationID = getNewId(
|
|
357 namedEntityIdentifierCreationID,
|
|
358 "NamedEntityIdentifierCreation", new String[] { outCtx,
|
|
359 mpiwgPerson });
|
|
360
|
|
361 OWLIndividual dbcreation = mh.generateEntity(
|
|
362 TripleStoreHandler.ONTOLOGY_NS
|
|
363 + "NamedEntityIdentifierCreation",
|
|
364 TripleStoreHandler.ONTOLOGY_NS
|
|
365 + "NamedEntityIdentifierCreation:"
|
|
366 + String.valueOf(namedEntityIdentifierCreationID),
|
|
367 db_creationValues);
|
|
368 namedEntityIdentifierCreationID += 1;
|
|
369
|
|
370 th.write(dbcreation, outCtx);
|
|
371
|
|
372 // add the bbpedia identifier to the triple store
|
|
373 th.write(gndPerson.stringValue(),
|
|
374 "http://erlangen-crm.org/110404/P1_is_identified_by",
|
|
375 dbIdentifier.getURI(), outCtx);
|
|
376 ;
|
|
377
|
|
378 }
|
|
379
|
|
380 private URI getPersonFromDBPedia(URI dbpedia) {
|
|
381 String query = "select distinct ?x ?y ?person "
|
|
382 + "from <file://mpiwg_persons.rdf> "
|
|
383 + "where { ?y <http://ontologies.mpiwg-berlin.mpg.de/authorities/namedIdentities#identifies_NamedEntity> ?person."
|
|
384 + "<" + dbpedia.stringValue()
|
|
385 + "> <http://erlangen-crm.org/110404/P1_is_identified_by> ?y.}";
|
|
386
|
|
387 try {
|
|
388 TupleQueryResult results = th.querySPARQL(query);
|
|
389 while (results.hasNext()) {
|
|
390 BindingSet stm = results.next();
|
|
391 Binding person = stm.getBinding("person");
|
|
392 return (URI) person.getValue();
|
|
393 }
|
|
394 } catch (MalformedQueryException e) {
|
|
395 // TODO Auto-generated catch block
|
|
396 e.printStackTrace();
|
|
397 } catch (QueryEvaluationException e) {
|
|
398 // TODO Auto-generated catch block
|
|
399 e.printStackTrace();
|
|
400 } catch (TripleStoreHandlerException e) {
|
|
401 // TODO Auto-generated catch block
|
|
402 e.printStackTrace();
|
|
403 }
|
|
404 return null;
|
|
405 }
|
|
406
|
|
407 private URI getDBPediaLink(URI subj, String inCtx) {
|
|
408 try {
|
|
409 RepositoryResult<Statement> statements = th.getStatements(subj,
|
|
410 th.createUri("http://www.w3.org/2002/07/owl#sameAs"), null,
|
|
411 inCtx);
|
|
412 while (statements.hasNext()) {
|
|
413 Statement smt = statements.next();
|
|
414 URI obj = (URI) smt.getObject();
|
|
415 if (obj.getNamespace().equals("http://dbpedia.org/resource/")) {
|
|
416 return obj;
|
|
417 }
|
|
418 }
|
|
419 return null;
|
|
420 } catch (RepositoryException e) {
|
|
421 // TODO Auto-generated catch block
|
|
422 e.printStackTrace();
|
|
423 return null;
|
|
424 }
|
|
425 }
|
|
426
|
|
427 private Boolean checkExistance(HashMap<String, Object> newValues,
|
|
428 TripleStoreHandler th2, Collection<String> props, String outCtx)
|
|
429 throws RepositoryException {
|
|
430
|
|
431 String queryString = "select ?x FROM <" + outCtx + "> " + "where {";
|
|
432 for (String propString : props) {
|
|
433 // RDFProperty rdfProp = model.getRDFProperty(propString);
|
|
434 String str = "";
|
|
435 String lang = "";
|
|
436 Object valObj = newValues.get(propString);
|
|
437 if (RDFSLiteral.class.isInstance(valObj)) {
|
|
438 RDFSLiteral val = (RDFSLiteral) valObj;
|
|
439 if (val == null)
|
|
440 continue;
|
|
441 lang = val.getLanguage();
|
|
442 str = val.getString();
|
|
443 str = str.replace("\"", "\\\"");
|
|
444 queryString += "?x <" + propString + "> \"" + str + "\"@" + lang
|
|
445 + ".";
|
|
446 } else {
|
|
447 str = (String) valObj;
|
|
448 str = str.replace("\"", "\\\"");
|
|
449 queryString += "?x <" + propString + "> \"" + str + "\""
|
|
450 + ".";
|
|
451 }
|
|
452
|
|
453
|
|
454 }
|
|
455
|
|
456 queryString += " }";
|
|
457
|
|
458 TupleQueryResult result;
|
|
459 try {
|
|
460 result = th.querySPARQL(queryString);
|
|
461 } catch (MalformedQueryException e) {
|
|
462 logger.error("Query String cannot be handled:" + queryString);
|
|
463 return false;
|
|
464 } catch (QueryEvaluationException e) {
|
|
465 logger.error("Query String cannot be handled:" + queryString);
|
|
466 return false;
|
|
467 } catch (TripleStoreHandlerException e) {
|
|
468 e.printStackTrace();
|
|
469 logger.error("Query String cannot be handled:" + queryString);
|
|
470 return false;
|
|
471 }
|
|
472 try {
|
|
473 if (result.hasNext())
|
|
474 return true;
|
|
475 else
|
|
476 return false;
|
|
477 } catch (QueryEvaluationException e) {
|
|
478 logger.error("Query String cannot be handled:" + queryString);
|
|
479 return false;
|
|
480 }
|
|
481
|
|
482 }
|
|
483
|
|
484 private int getNewId(int startnumber, String identifier, String[] ctx)
|
|
485 throws SQLException {
|
|
486
|
|
487 java.sql.Statement smt = th.sqlCon.createStatement();
|
|
488 Boolean exists = true;
|
|
489
|
|
490 while (exists) {
|
|
491 String fromString = "";
|
|
492 for (int i = 0; i < ctx.length; i++) {
|
|
493 fromString += String.format(" from <%s> ", ctx[i]);
|
|
494 }
|
|
495 String cmdString = String.format(
|
|
496 "sparql select count(*) %s where {<%s> ?x ?y}",
|
|
497 fromString,
|
|
498 TripleStoreHandler.ONTOLOGY_NS + identifier + ":"
|
|
499 + String.valueOf(startnumber));
|
|
500 smt.execute(cmdString);
|
|
501 ResultSet rs = smt.getResultSet();
|
|
502 rs.next();
|
|
503 int count = rs.getInt(1);
|
|
504 if (count > 0) {
|
|
505 startnumber += 1;
|
|
506 } else {
|
|
507 exists = false;
|
|
508 }
|
|
509 }
|
|
510
|
|
511 return startnumber;
|
|
512 }
|
|
513
|
|
514 private Boolean checkExistance(OWLIndividual person,
|
|
515 TripleStoreHandler th2, Collection<String> props, String outCtx)
|
|
516 throws RepositoryException {
|
|
517 JenaOWLModel model = mh.getOwlModel();
|
|
518 // Map<String, String> vals = new HashMap<String, String>();
|
|
519
|
|
520 String queryString = "select ?x FROM <" + outCtx + "> " + "where {";
|
|
521 for (String propString : props) {
|
|
522 RDFProperty rdfProp = model.getRDFProperty(propString);
|
|
523 RDFSLiteral val = (RDFSLiteral) person.getPropertyValue(rdfProp);
|
|
524 if (val == null)
|
|
525 continue;
|
|
526 String lang = val.getLanguage();
|
|
527 String str = val.getString();
|
|
528 str = str.replace("\"", "\\\"");
|
|
529 queryString += "?x <" + propString + "> \"" + str + "\"@" + lang
|
|
530 + ".";
|
|
531 }
|
|
532
|
|
533 queryString += " }";
|
|
534 TupleQueryResult result;
|
|
535 try {
|
|
536 result = th.querySPARQL(queryString);
|
|
537 } catch (MalformedQueryException e) {
|
|
538 logger.error("Query String cannot be handled:" + queryString);
|
|
539 return false;
|
|
540 } catch (QueryEvaluationException e) {
|
|
541 logger.error("Query String cannot be handled:" + queryString);
|
|
542 return false;
|
|
543 } catch (TripleStoreHandlerException e) {
|
|
544 logger.error("Query String cannot be handled:" + queryString);
|
|
545 return false;
|
|
546 }
|
|
547 try {
|
|
548 if (result.hasNext())
|
|
549 return true;
|
|
550 else
|
|
551 return false;
|
|
552 } catch (QueryEvaluationException e) {
|
|
553 logger.error("Query String cannot be handled:" + queryString);
|
|
554 return false;
|
|
555 }
|
|
556
|
|
557 }
|
|
558
|
|
559 private OWLIndividual createDbPediaProvider(String ctx)
|
|
560 throws RepositoryException, TripleStoreHandlerException {
|
|
561 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
|
|
562
|
|
563 OWLIndividual dbcreation = mh.generateEntity(
|
|
564 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider",
|
|
565 TripleStoreHandler.ONTOLOGY_NS + "provider_wikipedia");
|
|
566
|
|
567 th.write(dbcreation, ctx);
|
|
568
|
|
569 return dbcreation;
|
|
570
|
|
571 }
|
|
572
|
|
573 private OWLIndividual createDNBProvider(String ctx)
|
|
574 throws RepositoryException, TripleStoreHandlerException {
|
|
575 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
|
|
576
|
|
577 OWLIndividual dbcreation = mh.generateEntity(
|
|
578 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider",
|
|
579 TripleStoreHandler.ONTOLOGY_NS + "provider_DNB");
|
|
580
|
|
581 th.write(dbcreation, ctx);
|
|
582
|
|
583 return dbcreation;
|
|
584
|
|
585 }
|
|
586
|
|
587 private OWLIndividual createMPIWFProvider(String ctx)
|
|
588 throws RepositoryException, TripleStoreHandlerException {
|
|
589 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
|
|
590
|
|
591 OWLIndividual dbcreation = mh.generateEntity(
|
|
592 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider",
|
|
593 TripleStoreHandler.ONTOLOGY_NS + "provider_MPIWG");
|
|
594
|
|
595 th.write(dbcreation, ctx);
|
|
596
|
|
597 return dbcreation;
|
|
598
|
|
599 }
|
|
600
|
|
601 public static void main(String args[]) throws Exception {
|
|
602 if (args.length < 4) {
|
|
603 System.out.println("usage: import user pw offset limit ");
|
|
604 System.exit(1);
|
|
605 }
|
|
606
|
|
607 Logger.getRootLogger().setLevel(Level.INFO);
|
|
608 BasicConfigurator.configure();
|
|
609
|
|
610 MetaDataHandler mh = new MetaDataHandler();
|
|
611
|
|
612 // TripleStoreHandler th = new TripleStoreHandler(
|
|
613 // "jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1111",args[0],
|
|
614 // args[1]);
|
|
615
|
|
616 TripleStoreHandler th = new TripleStoreHandler(
|
|
617 "jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1111", args[0], args[1]);
|
|
618 System.out.println(mh);
|
|
619 System.out.println(th);
|
|
620
|
|
621 ImportGND imp = new ImportGND(mh, th, args[2], args[3]);
|
|
622
|
|
623 HashMap<String, String> mapping = new HashMap<String, String>();
|
|
624 HashMap<String, String> complexMapping = new HashMap<String, String>();
|
|
625
|
|
626 // SELECT DISTINCT *
|
|
627 // FROM <file://mpiwg_persons.rdf>
|
|
628 // FROM <file:///GND.rdf>
|
|
629 // WHERE { ?p <http://d-nb.info/gnd/foreName> ?o.
|
|
630 // <http://d-nb.info/gnd/100004776>
|
|
631 // <http://d-nb.info/gnd/preferredNameForThePerson> ?p }
|
|
632
|
|
633 // SELECT DISTINCT *
|
|
634 // FROM <file://mpiwg_persons.rdf>
|
|
635 // FROM <file:///GND.rdf>
|
|
636 // WHERE { ?o <http://d-nb.info/gnd/surname> ?o2.
|
|
637 // <http://d-nb.info/gnd/100004776>
|
|
638 // <http://d-nb.info/gnd/preferredNameForThePerson> ?o }
|
|
639
|
|
640 complexMapping.put("?p <http://d-nb.info/gnd/surname> ?o."
|
|
641 + "<%s> <http://d-nb.info/gnd/preferredNameForThePerson> ?p ",
|
|
642 "http://xmlns.com/foaf/0.1/lastName");
|
|
643 complexMapping.put("?p <http://d-nb.info/gnd/foreName> ?o."
|
|
644 + "<%s> <http://d-nb.info/gnd/preferredNameForThePerson> ?p ",
|
|
645 "http://xmlns.com/foaf/0.1/firstName");
|
|
646 mapping.put("http://d-nb.info/gnd/preferredNameForThePerson",
|
|
647 "http://xmlns.com/foaf/0.1/name");
|
|
648
|
|
649 imp.createMPIWGFromGNDIdentifiers(
|
|
650 "http://RDVocab.info/ElementsGr2/identifierForThePerson",
|
|
651 mapping, complexMapping, "file:///GND.rdf",
|
|
652 "file://mpiwg_persons_dnb.rdf");
|
|
653
|
|
654 // mh.getOwlModel().save(new java.net.URI("file:///tmp/prot.owl"));
|
|
655 }
|
|
656
|
|
657 }
|