Mercurial > hg > NamedIdentityManager
comparison src/de/mpiwg/itgroup/nimanager/importer/ImportGND.java @ 2:e3ecb88314a5
minor bugs
ontologies added
author | dwinter |
---|---|
date | Fri, 02 Dec 2011 08:37:03 +0100 |
parents | |
children | f986e74583eb |
comparison
equal
deleted
inserted
replaced
1:b8333fab0d95 | 2:e3ecb88314a5 |
---|---|
1 package de.mpiwg.itgroup.nimanager.importer; | |
2 | |
3 import java.awt.dnd.DnDConstants; | |
4 import java.io.FileWriter; | |
5 import java.io.IOException; | |
6 import java.io.PrintStream; | |
7 import java.net.URISyntaxException; | |
8 import java.net.URL; | |
9 import java.sql.Connection; | |
10 import java.sql.DriverManager; | |
11 import java.sql.ResultSet; | |
12 import java.sql.SQLException; | |
13 import java.util.Collection; | |
14 import java.util.HashMap; | |
15 import java.util.List; | |
16 import java.util.Map; | |
17 | |
18 import org.apache.log4j.BasicConfigurator; | |
19 import org.apache.log4j.Level; | |
20 import org.apache.log4j.Logger; | |
21 import org.openrdf.model.Resource; | |
22 import org.openrdf.model.Statement; | |
23 import org.openrdf.model.URI; | |
24 import org.openrdf.model.Value; | |
25 import org.openrdf.model.impl.LiteralImpl; | |
26 import org.openrdf.query.Binding; | |
27 import org.openrdf.query.BindingSet; | |
28 import org.openrdf.query.MalformedQueryException; | |
29 import org.openrdf.query.Query; | |
30 import org.openrdf.query.QueryEvaluationException; | |
31 import org.openrdf.query.QueryLanguage; | |
32 import org.openrdf.query.TupleQuery; | |
33 import org.openrdf.query.TupleQueryResult; | |
34 import org.openrdf.repository.RepositoryConnection; | |
35 import org.openrdf.repository.RepositoryException; | |
36 import org.openrdf.repository.RepositoryResult; | |
37 | |
38 import com.hp.hpl.jena.graph.impl.LiteralLabel; | |
39 | |
40 import de.mpiwg.itgroup.nimanager.exceptions.TripleStoreHandlerException; | |
41 import de.mpiwg.itgroup.nimanager.owl.MetaDataHandler; | |
42 import de.mpiwg.itgroup.nimanager.owl.TripleStoreHandler; | |
43 import edu.stanford.smi.protegex.owl.jena.JenaOWLModel; | |
44 import edu.stanford.smi.protegex.owl.model.OWLIndividual; | |
45 import edu.stanford.smi.protegex.owl.model.RDFProperty; | |
46 import edu.stanford.smi.protegex.owl.model.RDFSLiteral; | |
47 | |
48 public class ImportGND { | |
49 | |
50 private MetaDataHandler mh; | |
51 private TripleStoreHandler th; | |
52 private Logger logger = Logger.getRootLogger(); | |
53 // private Connection con; | |
54 private String offset; | |
55 private String limit; | |
56 private FileWriter dbpediaMissing; | |
57 | |
58 private String mpiwgPerson = "file://mpiwg_persons_2.rdf"; | |
59 | |
60 public ImportGND(MetaDataHandler mh, TripleStoreHandler th, String offset, | |
61 String limit) throws SQLException, ClassNotFoundException { | |
62 this.mh = mh; | |
63 this.th = th; | |
64 this.offset = offset; | |
65 this.limit = limit; | |
66 | |
67 } | |
68 | |
69 private void createMPIWGFromGNDIdentifiers(String predicate, | |
70 HashMap<String, String> mapping, String inCtx, String outCtx) | |
71 throws RepositoryException, MalformedQueryException, | |
72 QueryEvaluationException, URISyntaxException, | |
73 TripleStoreHandlerException, SQLException, IOException { | |
74 createMPIWGFromGNDIdentifiers(predicate, mapping, | |
75 new HashMap<String, String>(), inCtx, outCtx); | |
76 } | |
77 | |
78 // benutze predicate zur identifizierung der gnd eintrage, diese haben keine | |
79 // rdfs:type attribute, daher nehme ich alle eintraege die ein bestimmtes | |
80 // attribute hhaben | |
81 private void createMPIWGFromGNDIdentifiers(String predicate, | |
82 HashMap<String, String> mapping, | |
83 HashMap<String, String> complexMapping, String inCtx, String outCtx) | |
84 throws RepositoryException, MalformedQueryException, | |
85 QueryEvaluationException, URISyntaxException, | |
86 TripleStoreHandlerException, SQLException, IOException { | |
87 | |
88 dbpediaMissing = new FileWriter("/tmp/missingDBPedia.txt"); | |
89 int newPersonID = 0; | |
90 int namedEntityIdentifierID = 0; | |
91 int namedEntityIdentifierCreationID = 0; | |
92 | |
93 // List<String> classes = mh.getEquivalentClasses(clsName); // suche | |
94 // alle | |
95 // aequivalenten | |
96 // Klassen | |
97 // classes.add(clsName); // add the classname it self; | |
98 | |
99 OWLIndividual providerMPIWG = th.getProvider(mh, | |
100 TripleStoreHandler.ONTOLOGY_NS + "provider_MPIWG"); | |
101 | |
102 if (providerMPIWG == null) { | |
103 providerMPIWG = createMPIWFProvider(outCtx); | |
104 } | |
105 | |
106 OWLIndividual providerDbPedia = th.getProvider(mh, | |
107 TripleStoreHandler.ONTOLOGY_NS + "provider_wikipedia"); | |
108 | |
109 if (providerDbPedia == null) { | |
110 providerDbPedia = createDbPediaProvider(outCtx); | |
111 } | |
112 | |
113 OWLIndividual providerDNB = th.getProvider(mh, | |
114 TripleStoreHandler.ONTOLOGY_NS + "provider_DNB"); | |
115 | |
116 if (providerDNB == null) { | |
117 providerDNB = createDNBProvider(outCtx); | |
118 } | |
119 | |
120 RepositoryConnection con = th.getRepository().getConnection(); | |
121 // find all object | |
122 String queryString = "SELECT DISTINCT ?s FROM <" + inCtx | |
123 + "> WHERE {?s <" + predicate + ">" + "?o .} OFFSET " + offset | |
124 + " LIMIT " + limit; | |
125 | |
126 TupleQueryResult result = th.querySPARQL(queryString); | |
127 | |
128 while (result.hasNext()) { | |
129 BindingSet bindingSet = result.next(); | |
130 Value subjValue = bindingSet.getValue("s"); | |
131 URI gndPerson = th.getRepository().getValueFactory() | |
132 .createURI(subjValue.stringValue()); | |
133 | |
134 // check if dbpedia link exists | |
135 boolean createNew = false; | |
136 URI dbpedia = getDBPediaLink(gndPerson, inCtx); | |
137 if (dbpedia != null) { | |
138 // gibt es den entsprechenden Eintrag schon, dann hole die | |
139 // entprechende person | |
140 URI person = getPersonFromDBPedia(dbpedia); | |
141 | |
142 if (person != null) { | |
143 OWLIndividual personInd = mh.generateEntity( | |
144 TripleStoreHandler.ONTOLOGY_NS + "Person", | |
145 person.toString()); | |
146 addGNDToMPIWGIdentifier(gndPerson, personInd, | |
147 namedEntityIdentifierID, outCtx, providerDNB, | |
148 namedEntityIdentifierCreationID); | |
149 } else { | |
150 dbpediaMissing.write(subjValue.stringValue()); | |
151 createNew = true; | |
152 } | |
153 } else { | |
154 createNew = true; | |
155 } | |
156 | |
157 // zur GND gibt es noch keinen personen eintrag related zur DB | |
158 if (createNew) { | |
159 | |
160 HashMap<String, Object> newValues = new HashMap<String, Object>();// werte | |
161 // die | |
162 // neu | |
163 // eingetragen | |
164 // werden | |
165 // muessen | |
166 for (String key : mapping.keySet()) { | |
167 | |
168 RepositoryResult<Statement> namesStatements = con | |
169 .getStatements(gndPerson, th.getRepository() | |
170 .getValueFactory().createURI(key), null, | |
171 false); | |
172 | |
173 while (namesStatements.hasNext()) { | |
174 Statement stmt = namesStatements.next(); | |
175 Object newValue; | |
176 Value val = stmt.getObject(); | |
177 | |
178 // sollte literal sein | |
179 if (LiteralImpl.class.isInstance(val)) { // wenn ein | |
180 // string | |
181 // literal, | |
182 // dann | |
183 // uebersetze | |
184 // in jena | |
185 // string | |
186 // literal | |
187 LiteralImpl li = (LiteralImpl) val; | |
188 | |
189 newValue = mh.getOwlModel() | |
190 .createRDFSLiteralOrString(li.getLabel(), | |
191 li.getLanguage()); | |
192 newValues.put(mapping.get(key), newValue); | |
193 break; | |
194 // nicht mehr weiter suche, nimm also immer den | |
195 // ersten literal | |
196 } | |
197 | |
198 } | |
199 } | |
200 | |
201 for (String key : complexMapping.keySet()) { | |
202 | |
203 String cmd = "select ?o from <" + inCtx + "> where {" | |
204 + String.format(key, gndPerson.stringValue()) + "}"; | |
205 TupleQueryResult results = th.querySPARQL(cmd); | |
206 | |
207 if (results.hasNext()) { // nimm nur das erste | |
208 BindingSet firstStatement = results.next(); | |
209 | |
210 Object newValue; | |
211 Value val = firstStatement.getBinding("o").getValue(); | |
212 | |
213 if (LiteralImpl.class.isInstance(val)) { // wenn ein | |
214 // string | |
215 // literal, | |
216 // dann | |
217 // uebersetze | |
218 // in jena | |
219 // string | |
220 // literal | |
221 LiteralImpl li = (LiteralImpl) val; | |
222 | |
223 newValue = mh.getOwlModel() | |
224 .createRDFSLiteralOrString(li.getLabel(), | |
225 li.getLanguage()); | |
226 } else { // anderfalls dern string wert = uri | |
227 newValue = val.stringValue(); | |
228 | |
229 } | |
230 | |
231 newValues.put(complexMapping.get(key), newValue); | |
232 } | |
233 } | |
234 | |
235 // first create the new person | |
236 | |
237 Boolean ex = checkExistance(newValues, th, mapping.values(), | |
238 outCtx); | |
239 | |
240 if (ex) { | |
241 logger.info("nothing to be done!"); | |
242 continue; | |
243 } | |
244 newPersonID = getNewId(newPersonID, "Person", new String[] { | |
245 outCtx, mpiwgPerson }); | |
246 logger.info("New ID choosen:" + String.valueOf(newPersonID)); | |
247 OWLIndividual person = mh.generateEntity( | |
248 TripleStoreHandler.ONTOLOGY_NS + "Person", | |
249 TripleStoreHandler.ONTOLOGY_NS + "Person:" | |
250 + String.valueOf(newPersonID), newValues); | |
251 mh.printIndividual(person); | |
252 | |
253 newPersonID += 1; | |
254 // Boolean ex = checkExistance(person, th, mapping.values(), | |
255 // outCtx); | |
256 // if (ex) { | |
257 // logger.info("nothing to be done!"); | |
258 // person.delete(); | |
259 // continue; | |
260 // } | |
261 th.write(person, outCtx); | |
262 | |
263 // now we create the MPIWG identifier and connect it to the | |
264 // person | |
265 HashMap<String, Object> idValues = new HashMap<String, Object>(); | |
266 idValues.put(TripleStoreHandler.ONTOLOGY_NS | |
267 + "is_preferred_namedEntityIdentifier", person); | |
268 idValues.put( | |
269 "http://erlangen-crm.org/plus/xdt/110404/has_XSD_String", | |
270 "Person:" + String.valueOf(newPersonID)); // TODO PID | |
271 // GENERATOR | |
272 | |
273 namedEntityIdentifierID = getNewId(namedEntityIdentifierID, | |
274 "NamedEntityIdentifier", new String[] { outCtx, | |
275 mpiwgPerson }); | |
276 OWLIndividual mpiwgIdentifier = mh.generateEntity( | |
277 TripleStoreHandler.ONTOLOGY_NS | |
278 + "NamedEntityIdentifier", | |
279 TripleStoreHandler.ONTOLOGY_NS | |
280 + "NamedEntityIdentifier:" | |
281 + String.valueOf(namedEntityIdentifierID), | |
282 idValues); | |
283 | |
284 namedEntityIdentifierID += 1; | |
285 th.write(mpiwgIdentifier, outCtx); | |
286 | |
287 // now create the creation object and connect it to the MPIWG | |
288 // identifier and the provider MPIWG | |
289 HashMap<String, Object> creationValues = new HashMap<String, Object>(); | |
290 creationValues.put(TripleStoreHandler.ONTOLOGY_NS | |
291 + "carriedOutByNamedEntityProvider", providerMPIWG); | |
292 creationValues.put(TripleStoreHandler.ONTOLOGY_NS | |
293 + "created_NamedEntityIdentifier", mpiwgIdentifier); | |
294 | |
295 namedEntityIdentifierCreationID = getNewId( | |
296 namedEntityIdentifierCreationID, | |
297 "NamedEntityIdentifierCreation", new String[] { outCtx, | |
298 mpiwgPerson }); | |
299 | |
300 OWLIndividual creation = mh | |
301 .generateEntity( | |
302 TripleStoreHandler.ONTOLOGY_NS | |
303 + "NamedEntityIdentifierCreation", | |
304 TripleStoreHandler.ONTOLOGY_NS | |
305 + "NamedEntityIdentifierCreation:" | |
306 + String.valueOf(namedEntityIdentifierCreationID), | |
307 creationValues); | |
308 namedEntityIdentifierCreationID += 1; | |
309 | |
310 th.write(creation, outCtx); | |
311 | |
312 addGNDToMPIWGIdentifier(gndPerson, person, | |
313 namedEntityIdentifierID, outCtx, providerDNB, | |
314 namedEntityIdentifierCreationID); | |
315 } | |
316 } | |
317 | |
318 } | |
319 | |
320 private void addGNDToMPIWGIdentifier(URI gndPerson, OWLIndividual person, | |
321 int namedEntityIdentifierID, String outCtx, Object gndProvider, | |
322 int namedEntityIdentifierCreationID) throws SQLException, | |
323 RepositoryException, TripleStoreHandlerException { | |
324 // now create the gnd identifier and connect it to the | |
325 // person | |
326 | |
327 HashMap<String, Object> db_idValues = new HashMap<String, Object>(); | |
328 db_idValues.put(TripleStoreHandler.ONTOLOGY_NS | |
329 + "identifies_NamedEntity", person); | |
330 | |
331 // identifier is the url at dng | |
332 db_idValues.put( | |
333 "http://erlangen-crm.org/plus/xdt/110404/has_XSD_String", | |
334 gndPerson.stringValue()); | |
335 | |
336 namedEntityIdentifierID = getNewId(namedEntityIdentifierID, | |
337 "NamedEntityIdentifier", new String[] { outCtx, mpiwgPerson }); | |
338 | |
339 OWLIndividual dbIdentifier = mh.generateEntity( | |
340 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifier", | |
341 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifier:" | |
342 + String.valueOf(namedEntityIdentifierID), db_idValues); | |
343 | |
344 namedEntityIdentifierID += 1; | |
345 | |
346 th.write(dbIdentifier, outCtx); | |
347 | |
348 // now create the creation object and connect it to the gnd | |
349 // identifier and the provider gnd | |
350 HashMap<String, Object> db_creationValues = new HashMap<String, Object>(); | |
351 db_creationValues.put(TripleStoreHandler.ONTOLOGY_NS | |
352 + "carriedOutByNamedEntityProvider", gndProvider); | |
353 db_creationValues.put(TripleStoreHandler.ONTOLOGY_NS | |
354 + "created_NamedEntityIdentifier", dbIdentifier); | |
355 | |
356 namedEntityIdentifierCreationID = getNewId( | |
357 namedEntityIdentifierCreationID, | |
358 "NamedEntityIdentifierCreation", new String[] { outCtx, | |
359 mpiwgPerson }); | |
360 | |
361 OWLIndividual dbcreation = mh.generateEntity( | |
362 TripleStoreHandler.ONTOLOGY_NS | |
363 + "NamedEntityIdentifierCreation", | |
364 TripleStoreHandler.ONTOLOGY_NS | |
365 + "NamedEntityIdentifierCreation:" | |
366 + String.valueOf(namedEntityIdentifierCreationID), | |
367 db_creationValues); | |
368 namedEntityIdentifierCreationID += 1; | |
369 | |
370 th.write(dbcreation, outCtx); | |
371 | |
372 // add the bbpedia identifier to the triple store | |
373 th.write(gndPerson.stringValue(), | |
374 "http://erlangen-crm.org/110404/P1_is_identified_by", | |
375 dbIdentifier.getURI(), outCtx); | |
376 ; | |
377 | |
378 } | |
379 | |
380 private URI getPersonFromDBPedia(URI dbpedia) { | |
381 String query = "select distinct ?x ?y ?person " | |
382 + "from <file://mpiwg_persons.rdf> " | |
383 + "where { ?y <http://ontologies.mpiwg-berlin.mpg.de/authorities/namedIdentities#identifies_NamedEntity> ?person." | |
384 + "<" + dbpedia.stringValue() | |
385 + "> <http://erlangen-crm.org/110404/P1_is_identified_by> ?y.}"; | |
386 | |
387 try { | |
388 TupleQueryResult results = th.querySPARQL(query); | |
389 while (results.hasNext()) { | |
390 BindingSet stm = results.next(); | |
391 Binding person = stm.getBinding("person"); | |
392 return (URI) person.getValue(); | |
393 } | |
394 } catch (MalformedQueryException e) { | |
395 // TODO Auto-generated catch block | |
396 e.printStackTrace(); | |
397 } catch (QueryEvaluationException e) { | |
398 // TODO Auto-generated catch block | |
399 e.printStackTrace(); | |
400 } catch (TripleStoreHandlerException e) { | |
401 // TODO Auto-generated catch block | |
402 e.printStackTrace(); | |
403 } | |
404 return null; | |
405 } | |
406 | |
407 private URI getDBPediaLink(URI subj, String inCtx) { | |
408 try { | |
409 RepositoryResult<Statement> statements = th.getStatements(subj, | |
410 th.createUri("http://www.w3.org/2002/07/owl#sameAs"), null, | |
411 inCtx); | |
412 while (statements.hasNext()) { | |
413 Statement smt = statements.next(); | |
414 URI obj = (URI) smt.getObject(); | |
415 if (obj.getNamespace().equals("http://dbpedia.org/resource/")) { | |
416 return obj; | |
417 } | |
418 } | |
419 return null; | |
420 } catch (RepositoryException e) { | |
421 // TODO Auto-generated catch block | |
422 e.printStackTrace(); | |
423 return null; | |
424 } | |
425 } | |
426 | |
427 private Boolean checkExistance(HashMap<String, Object> newValues, | |
428 TripleStoreHandler th2, Collection<String> props, String outCtx) | |
429 throws RepositoryException { | |
430 | |
431 String queryString = "select ?x FROM <" + outCtx + "> " + "where {"; | |
432 for (String propString : props) { | |
433 // RDFProperty rdfProp = model.getRDFProperty(propString); | |
434 String str = ""; | |
435 String lang = ""; | |
436 Object valObj = newValues.get(propString); | |
437 if (RDFSLiteral.class.isInstance(valObj)) { | |
438 RDFSLiteral val = (RDFSLiteral) valObj; | |
439 if (val == null) | |
440 continue; | |
441 lang = val.getLanguage(); | |
442 str = val.getString(); | |
443 str = str.replace("\"", "\\\""); | |
444 queryString += "?x <" + propString + "> \"" + str + "\"@" + lang | |
445 + "."; | |
446 } else { | |
447 str = (String) valObj; | |
448 str = str.replace("\"", "\\\""); | |
449 queryString += "?x <" + propString + "> \"" + str + "\"" | |
450 + "."; | |
451 } | |
452 | |
453 | |
454 } | |
455 | |
456 queryString += " }"; | |
457 | |
458 TupleQueryResult result; | |
459 try { | |
460 result = th.querySPARQL(queryString); | |
461 } catch (MalformedQueryException e) { | |
462 logger.error("Query String cannot be handled:" + queryString); | |
463 return false; | |
464 } catch (QueryEvaluationException e) { | |
465 logger.error("Query String cannot be handled:" + queryString); | |
466 return false; | |
467 } catch (TripleStoreHandlerException e) { | |
468 e.printStackTrace(); | |
469 logger.error("Query String cannot be handled:" + queryString); | |
470 return false; | |
471 } | |
472 try { | |
473 if (result.hasNext()) | |
474 return true; | |
475 else | |
476 return false; | |
477 } catch (QueryEvaluationException e) { | |
478 logger.error("Query String cannot be handled:" + queryString); | |
479 return false; | |
480 } | |
481 | |
482 } | |
483 | |
484 private int getNewId(int startnumber, String identifier, String[] ctx) | |
485 throws SQLException { | |
486 | |
487 java.sql.Statement smt = th.sqlCon.createStatement(); | |
488 Boolean exists = true; | |
489 | |
490 while (exists) { | |
491 String fromString = ""; | |
492 for (int i = 0; i < ctx.length; i++) { | |
493 fromString += String.format(" from <%s> ", ctx[i]); | |
494 } | |
495 String cmdString = String.format( | |
496 "sparql select count(*) %s where {<%s> ?x ?y}", | |
497 fromString, | |
498 TripleStoreHandler.ONTOLOGY_NS + identifier + ":" | |
499 + String.valueOf(startnumber)); | |
500 smt.execute(cmdString); | |
501 ResultSet rs = smt.getResultSet(); | |
502 rs.next(); | |
503 int count = rs.getInt(1); | |
504 if (count > 0) { | |
505 startnumber += 1; | |
506 } else { | |
507 exists = false; | |
508 } | |
509 } | |
510 | |
511 return startnumber; | |
512 } | |
513 | |
514 private Boolean checkExistance(OWLIndividual person, | |
515 TripleStoreHandler th2, Collection<String> props, String outCtx) | |
516 throws RepositoryException { | |
517 JenaOWLModel model = mh.getOwlModel(); | |
518 // Map<String, String> vals = new HashMap<String, String>(); | |
519 | |
520 String queryString = "select ?x FROM <" + outCtx + "> " + "where {"; | |
521 for (String propString : props) { | |
522 RDFProperty rdfProp = model.getRDFProperty(propString); | |
523 RDFSLiteral val = (RDFSLiteral) person.getPropertyValue(rdfProp); | |
524 if (val == null) | |
525 continue; | |
526 String lang = val.getLanguage(); | |
527 String str = val.getString(); | |
528 str = str.replace("\"", "\\\""); | |
529 queryString += "?x <" + propString + "> \"" + str + "\"@" + lang | |
530 + "."; | |
531 } | |
532 | |
533 queryString += " }"; | |
534 TupleQueryResult result; | |
535 try { | |
536 result = th.querySPARQL(queryString); | |
537 } catch (MalformedQueryException e) { | |
538 logger.error("Query String cannot be handled:" + queryString); | |
539 return false; | |
540 } catch (QueryEvaluationException e) { | |
541 logger.error("Query String cannot be handled:" + queryString); | |
542 return false; | |
543 } catch (TripleStoreHandlerException e) { | |
544 logger.error("Query String cannot be handled:" + queryString); | |
545 return false; | |
546 } | |
547 try { | |
548 if (result.hasNext()) | |
549 return true; | |
550 else | |
551 return false; | |
552 } catch (QueryEvaluationException e) { | |
553 logger.error("Query String cannot be handled:" + queryString); | |
554 return false; | |
555 } | |
556 | |
557 } | |
558 | |
559 private OWLIndividual createDbPediaProvider(String ctx) | |
560 throws RepositoryException, TripleStoreHandlerException { | |
561 HashMap<String, Object> db_creationValues = new HashMap<String, Object>(); | |
562 | |
563 OWLIndividual dbcreation = mh.generateEntity( | |
564 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider", | |
565 TripleStoreHandler.ONTOLOGY_NS + "provider_wikipedia"); | |
566 | |
567 th.write(dbcreation, ctx); | |
568 | |
569 return dbcreation; | |
570 | |
571 } | |
572 | |
573 private OWLIndividual createDNBProvider(String ctx) | |
574 throws RepositoryException, TripleStoreHandlerException { | |
575 HashMap<String, Object> db_creationValues = new HashMap<String, Object>(); | |
576 | |
577 OWLIndividual dbcreation = mh.generateEntity( | |
578 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider", | |
579 TripleStoreHandler.ONTOLOGY_NS + "provider_DNB"); | |
580 | |
581 th.write(dbcreation, ctx); | |
582 | |
583 return dbcreation; | |
584 | |
585 } | |
586 | |
587 private OWLIndividual createMPIWFProvider(String ctx) | |
588 throws RepositoryException, TripleStoreHandlerException { | |
589 HashMap<String, Object> db_creationValues = new HashMap<String, Object>(); | |
590 | |
591 OWLIndividual dbcreation = mh.generateEntity( | |
592 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider", | |
593 TripleStoreHandler.ONTOLOGY_NS + "provider_MPIWG"); | |
594 | |
595 th.write(dbcreation, ctx); | |
596 | |
597 return dbcreation; | |
598 | |
599 } | |
600 | |
601 public static void main(String args[]) throws Exception { | |
602 if (args.length < 4) { | |
603 System.out.println("usage: import user pw offset limit "); | |
604 System.exit(1); | |
605 } | |
606 | |
607 Logger.getRootLogger().setLevel(Level.INFO); | |
608 BasicConfigurator.configure(); | |
609 | |
610 MetaDataHandler mh = new MetaDataHandler(); | |
611 | |
612 // TripleStoreHandler th = new TripleStoreHandler( | |
613 // "jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1111",args[0], | |
614 // args[1]); | |
615 | |
616 TripleStoreHandler th = new TripleStoreHandler( | |
617 "jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1111", args[0], args[1]); | |
618 System.out.println(mh); | |
619 System.out.println(th); | |
620 | |
621 ImportGND imp = new ImportGND(mh, th, args[2], args[3]); | |
622 | |
623 HashMap<String, String> mapping = new HashMap<String, String>(); | |
624 HashMap<String, String> complexMapping = new HashMap<String, String>(); | |
625 | |
626 // SELECT DISTINCT * | |
627 // FROM <file://mpiwg_persons.rdf> | |
628 // FROM <file:///GND.rdf> | |
629 // WHERE { ?p <http://d-nb.info/gnd/foreName> ?o. | |
630 // <http://d-nb.info/gnd/100004776> | |
631 // <http://d-nb.info/gnd/preferredNameForThePerson> ?p } | |
632 | |
633 // SELECT DISTINCT * | |
634 // FROM <file://mpiwg_persons.rdf> | |
635 // FROM <file:///GND.rdf> | |
636 // WHERE { ?o <http://d-nb.info/gnd/surname> ?o2. | |
637 // <http://d-nb.info/gnd/100004776> | |
638 // <http://d-nb.info/gnd/preferredNameForThePerson> ?o } | |
639 | |
640 complexMapping.put("?p <http://d-nb.info/gnd/surname> ?o." | |
641 + "<%s> <http://d-nb.info/gnd/preferredNameForThePerson> ?p ", | |
642 "http://xmlns.com/foaf/0.1/lastName"); | |
643 complexMapping.put("?p <http://d-nb.info/gnd/foreName> ?o." | |
644 + "<%s> <http://d-nb.info/gnd/preferredNameForThePerson> ?p ", | |
645 "http://xmlns.com/foaf/0.1/firstName"); | |
646 mapping.put("http://d-nb.info/gnd/preferredNameForThePerson", | |
647 "http://xmlns.com/foaf/0.1/name"); | |
648 | |
649 imp.createMPIWGFromGNDIdentifiers( | |
650 "http://RDVocab.info/ElementsGr2/identifierForThePerson", | |
651 mapping, complexMapping, "file:///GND.rdf", | |
652 "file://mpiwg_persons_dnb.rdf"); | |
653 | |
654 // mh.getOwlModel().save(new java.net.URI("file:///tmp/prot.owl")); | |
655 } | |
656 | |
657 } |