annotate src/de/mpiwg/itgroup/nimanager/importer/ImportGND.java @ 2:e3ecb88314a5

minor bugs ontologies added
author dwinter
date Fri, 02 Dec 2011 08:37:03 +0100
parents
children f986e74583eb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
1 package de.mpiwg.itgroup.nimanager.importer;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
2
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
3 import java.awt.dnd.DnDConstants;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
4 import java.io.FileWriter;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
5 import java.io.IOException;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
6 import java.io.PrintStream;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
7 import java.net.URISyntaxException;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
8 import java.net.URL;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
9 import java.sql.Connection;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
10 import java.sql.DriverManager;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
11 import java.sql.ResultSet;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
12 import java.sql.SQLException;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
13 import java.util.Collection;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
14 import java.util.HashMap;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
15 import java.util.List;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
16 import java.util.Map;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
17
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
18 import org.apache.log4j.BasicConfigurator;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
19 import org.apache.log4j.Level;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
20 import org.apache.log4j.Logger;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
21 import org.openrdf.model.Resource;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
22 import org.openrdf.model.Statement;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
23 import org.openrdf.model.URI;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
24 import org.openrdf.model.Value;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
25 import org.openrdf.model.impl.LiteralImpl;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
26 import org.openrdf.query.Binding;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
27 import org.openrdf.query.BindingSet;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
28 import org.openrdf.query.MalformedQueryException;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
29 import org.openrdf.query.Query;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
30 import org.openrdf.query.QueryEvaluationException;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
31 import org.openrdf.query.QueryLanguage;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
32 import org.openrdf.query.TupleQuery;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
33 import org.openrdf.query.TupleQueryResult;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
34 import org.openrdf.repository.RepositoryConnection;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
35 import org.openrdf.repository.RepositoryException;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
36 import org.openrdf.repository.RepositoryResult;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
37
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
38 import com.hp.hpl.jena.graph.impl.LiteralLabel;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
39
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
40 import de.mpiwg.itgroup.nimanager.exceptions.TripleStoreHandlerException;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
41 import de.mpiwg.itgroup.nimanager.owl.MetaDataHandler;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
42 import de.mpiwg.itgroup.nimanager.owl.TripleStoreHandler;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
43 import edu.stanford.smi.protegex.owl.jena.JenaOWLModel;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
44 import edu.stanford.smi.protegex.owl.model.OWLIndividual;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
45 import edu.stanford.smi.protegex.owl.model.RDFProperty;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
46 import edu.stanford.smi.protegex.owl.model.RDFSLiteral;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
47
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
48 public class ImportGND {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
49
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
50 private MetaDataHandler mh;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
51 private TripleStoreHandler th;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
52 private Logger logger = Logger.getRootLogger();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
53 // private Connection con;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
54 private String offset;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
55 private String limit;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
56 private FileWriter dbpediaMissing;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
57
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
58 private String mpiwgPerson = "file://mpiwg_persons_2.rdf";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
59
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
60 public ImportGND(MetaDataHandler mh, TripleStoreHandler th, String offset,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
61 String limit) throws SQLException, ClassNotFoundException {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
62 this.mh = mh;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
63 this.th = th;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
64 this.offset = offset;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
65 this.limit = limit;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
66
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
67 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
68
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
69 private void createMPIWGFromGNDIdentifiers(String predicate,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
70 HashMap<String, String> mapping, String inCtx, String outCtx)
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
71 throws RepositoryException, MalformedQueryException,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
72 QueryEvaluationException, URISyntaxException,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
73 TripleStoreHandlerException, SQLException, IOException {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
74 createMPIWGFromGNDIdentifiers(predicate, mapping,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
75 new HashMap<String, String>(), inCtx, outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
76 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
77
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
78 // benutze predicate zur identifizierung der gnd eintrage, diese haben keine
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
79 // rdfs:type attribute, daher nehme ich alle eintraege die ein bestimmtes
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
80 // attribute hhaben
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
81 private void createMPIWGFromGNDIdentifiers(String predicate,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
82 HashMap<String, String> mapping,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
83 HashMap<String, String> complexMapping, String inCtx, String outCtx)
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
84 throws RepositoryException, MalformedQueryException,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
85 QueryEvaluationException, URISyntaxException,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
86 TripleStoreHandlerException, SQLException, IOException {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
87
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
88 dbpediaMissing = new FileWriter("/tmp/missingDBPedia.txt");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
89 int newPersonID = 0;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
90 int namedEntityIdentifierID = 0;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
91 int namedEntityIdentifierCreationID = 0;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
92
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
93 // List<String> classes = mh.getEquivalentClasses(clsName); // suche
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
94 // alle
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
95 // aequivalenten
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
96 // Klassen
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
97 // classes.add(clsName); // add the classname it self;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
98
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
99 OWLIndividual providerMPIWG = th.getProvider(mh,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
100 TripleStoreHandler.ONTOLOGY_NS + "provider_MPIWG");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
101
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
102 if (providerMPIWG == null) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
103 providerMPIWG = createMPIWFProvider(outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
104 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
105
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
106 OWLIndividual providerDbPedia = th.getProvider(mh,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
107 TripleStoreHandler.ONTOLOGY_NS + "provider_wikipedia");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
108
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
109 if (providerDbPedia == null) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
110 providerDbPedia = createDbPediaProvider(outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
111 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
112
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
113 OWLIndividual providerDNB = th.getProvider(mh,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
114 TripleStoreHandler.ONTOLOGY_NS + "provider_DNB");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
115
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
116 if (providerDNB == null) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
117 providerDNB = createDNBProvider(outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
118 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
119
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
120 RepositoryConnection con = th.getRepository().getConnection();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
121 // find all object
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
122 String queryString = "SELECT DISTINCT ?s FROM <" + inCtx
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
123 + "> WHERE {?s <" + predicate + ">" + "?o .} OFFSET " + offset
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
124 + " LIMIT " + limit;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
125
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
126 TupleQueryResult result = th.querySPARQL(queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
127
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
128 while (result.hasNext()) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
129 BindingSet bindingSet = result.next();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
130 Value subjValue = bindingSet.getValue("s");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
131 URI gndPerson = th.getRepository().getValueFactory()
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
132 .createURI(subjValue.stringValue());
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
133
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
134 // check if dbpedia link exists
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
135 boolean createNew = false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
136 URI dbpedia = getDBPediaLink(gndPerson, inCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
137 if (dbpedia != null) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
138 // gibt es den entsprechenden Eintrag schon, dann hole die
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
139 // entprechende person
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
140 URI person = getPersonFromDBPedia(dbpedia);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
141
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
142 if (person != null) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
143 OWLIndividual personInd = mh.generateEntity(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
144 TripleStoreHandler.ONTOLOGY_NS + "Person",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
145 person.toString());
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
146 addGNDToMPIWGIdentifier(gndPerson, personInd,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
147 namedEntityIdentifierID, outCtx, providerDNB,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
148 namedEntityIdentifierCreationID);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
149 } else {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
150 dbpediaMissing.write(subjValue.stringValue());
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
151 createNew = true;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
152 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
153 } else {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
154 createNew = true;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
155 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
156
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
157 // zur GND gibt es noch keinen personen eintrag related zur DB
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
158 if (createNew) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
159
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
160 HashMap<String, Object> newValues = new HashMap<String, Object>();// werte
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
161 // die
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
162 // neu
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
163 // eingetragen
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
164 // werden
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
165 // muessen
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
166 for (String key : mapping.keySet()) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
167
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
168 RepositoryResult<Statement> namesStatements = con
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
169 .getStatements(gndPerson, th.getRepository()
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
170 .getValueFactory().createURI(key), null,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
171 false);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
172
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
173 while (namesStatements.hasNext()) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
174 Statement stmt = namesStatements.next();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
175 Object newValue;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
176 Value val = stmt.getObject();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
177
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
178 // sollte literal sein
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
179 if (LiteralImpl.class.isInstance(val)) { // wenn ein
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
180 // string
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
181 // literal,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
182 // dann
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
183 // uebersetze
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
184 // in jena
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
185 // string
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
186 // literal
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
187 LiteralImpl li = (LiteralImpl) val;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
188
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
189 newValue = mh.getOwlModel()
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
190 .createRDFSLiteralOrString(li.getLabel(),
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
191 li.getLanguage());
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
192 newValues.put(mapping.get(key), newValue);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
193 break;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
194 // nicht mehr weiter suche, nimm also immer den
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
195 // ersten literal
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
196 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
197
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
198 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
199 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
200
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
201 for (String key : complexMapping.keySet()) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
202
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
203 String cmd = "select ?o from <" + inCtx + "> where {"
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
204 + String.format(key, gndPerson.stringValue()) + "}";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
205 TupleQueryResult results = th.querySPARQL(cmd);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
206
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
207 if (results.hasNext()) { // nimm nur das erste
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
208 BindingSet firstStatement = results.next();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
209
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
210 Object newValue;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
211 Value val = firstStatement.getBinding("o").getValue();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
212
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
213 if (LiteralImpl.class.isInstance(val)) { // wenn ein
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
214 // string
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
215 // literal,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
216 // dann
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
217 // uebersetze
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
218 // in jena
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
219 // string
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
220 // literal
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
221 LiteralImpl li = (LiteralImpl) val;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
222
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
223 newValue = mh.getOwlModel()
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
224 .createRDFSLiteralOrString(li.getLabel(),
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
225 li.getLanguage());
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
226 } else { // anderfalls dern string wert = uri
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
227 newValue = val.stringValue();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
228
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
229 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
230
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
231 newValues.put(complexMapping.get(key), newValue);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
232 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
233 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
234
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
235 // first create the new person
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
236
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
237 Boolean ex = checkExistance(newValues, th, mapping.values(),
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
238 outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
239
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
240 if (ex) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
241 logger.info("nothing to be done!");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
242 continue;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
243 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
244 newPersonID = getNewId(newPersonID, "Person", new String[] {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
245 outCtx, mpiwgPerson });
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
246 logger.info("New ID choosen:" + String.valueOf(newPersonID));
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
247 OWLIndividual person = mh.generateEntity(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
248 TripleStoreHandler.ONTOLOGY_NS + "Person",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
249 TripleStoreHandler.ONTOLOGY_NS + "Person:"
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
250 + String.valueOf(newPersonID), newValues);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
251 mh.printIndividual(person);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
252
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
253 newPersonID += 1;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
254 // Boolean ex = checkExistance(person, th, mapping.values(),
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
255 // outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
256 // if (ex) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
257 // logger.info("nothing to be done!");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
258 // person.delete();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
259 // continue;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
260 // }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
261 th.write(person, outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
262
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
263 // now we create the MPIWG identifier and connect it to the
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
264 // person
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
265 HashMap<String, Object> idValues = new HashMap<String, Object>();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
266 idValues.put(TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
267 + "is_preferred_namedEntityIdentifier", person);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
268 idValues.put(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
269 "http://erlangen-crm.org/plus/xdt/110404/has_XSD_String",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
270 "Person:" + String.valueOf(newPersonID)); // TODO PID
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
271 // GENERATOR
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
272
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
273 namedEntityIdentifierID = getNewId(namedEntityIdentifierID,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
274 "NamedEntityIdentifier", new String[] { outCtx,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
275 mpiwgPerson });
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
276 OWLIndividual mpiwgIdentifier = mh.generateEntity(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
277 TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
278 + "NamedEntityIdentifier",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
279 TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
280 + "NamedEntityIdentifier:"
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
281 + String.valueOf(namedEntityIdentifierID),
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
282 idValues);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
283
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
284 namedEntityIdentifierID += 1;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
285 th.write(mpiwgIdentifier, outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
286
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
287 // now create the creation object and connect it to the MPIWG
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
288 // identifier and the provider MPIWG
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
289 HashMap<String, Object> creationValues = new HashMap<String, Object>();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
290 creationValues.put(TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
291 + "carriedOutByNamedEntityProvider", providerMPIWG);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
292 creationValues.put(TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
293 + "created_NamedEntityIdentifier", mpiwgIdentifier);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
294
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
295 namedEntityIdentifierCreationID = getNewId(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
296 namedEntityIdentifierCreationID,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
297 "NamedEntityIdentifierCreation", new String[] { outCtx,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
298 mpiwgPerson });
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
299
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
300 OWLIndividual creation = mh
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
301 .generateEntity(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
302 TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
303 + "NamedEntityIdentifierCreation",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
304 TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
305 + "NamedEntityIdentifierCreation:"
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
306 + String.valueOf(namedEntityIdentifierCreationID),
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
307 creationValues);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
308 namedEntityIdentifierCreationID += 1;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
309
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
310 th.write(creation, outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
311
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
312 addGNDToMPIWGIdentifier(gndPerson, person,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
313 namedEntityIdentifierID, outCtx, providerDNB,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
314 namedEntityIdentifierCreationID);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
315 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
316 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
317
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
318 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
319
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
320 private void addGNDToMPIWGIdentifier(URI gndPerson, OWLIndividual person,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
321 int namedEntityIdentifierID, String outCtx, Object gndProvider,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
322 int namedEntityIdentifierCreationID) throws SQLException,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
323 RepositoryException, TripleStoreHandlerException {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
324 // now create the gnd identifier and connect it to the
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
325 // person
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
326
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
327 HashMap<String, Object> db_idValues = new HashMap<String, Object>();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
328 db_idValues.put(TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
329 + "identifies_NamedEntity", person);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
330
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
331 // identifier is the url at dng
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
332 db_idValues.put(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
333 "http://erlangen-crm.org/plus/xdt/110404/has_XSD_String",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
334 gndPerson.stringValue());
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
335
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
336 namedEntityIdentifierID = getNewId(namedEntityIdentifierID,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
337 "NamedEntityIdentifier", new String[] { outCtx, mpiwgPerson });
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
338
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
339 OWLIndividual dbIdentifier = mh.generateEntity(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
340 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifier",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
341 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifier:"
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
342 + String.valueOf(namedEntityIdentifierID), db_idValues);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
343
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
344 namedEntityIdentifierID += 1;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
345
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
346 th.write(dbIdentifier, outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
347
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
348 // now create the creation object and connect it to the gnd
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
349 // identifier and the provider gnd
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
350 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
351 db_creationValues.put(TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
352 + "carriedOutByNamedEntityProvider", gndProvider);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
353 db_creationValues.put(TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
354 + "created_NamedEntityIdentifier", dbIdentifier);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
355
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
356 namedEntityIdentifierCreationID = getNewId(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
357 namedEntityIdentifierCreationID,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
358 "NamedEntityIdentifierCreation", new String[] { outCtx,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
359 mpiwgPerson });
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
360
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
361 OWLIndividual dbcreation = mh.generateEntity(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
362 TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
363 + "NamedEntityIdentifierCreation",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
364 TripleStoreHandler.ONTOLOGY_NS
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
365 + "NamedEntityIdentifierCreation:"
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
366 + String.valueOf(namedEntityIdentifierCreationID),
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
367 db_creationValues);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
368 namedEntityIdentifierCreationID += 1;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
369
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
370 th.write(dbcreation, outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
371
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
372 // add the bbpedia identifier to the triple store
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
373 th.write(gndPerson.stringValue(),
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
374 "http://erlangen-crm.org/110404/P1_is_identified_by",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
375 dbIdentifier.getURI(), outCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
376 ;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
377
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
378 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
379
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
380 private URI getPersonFromDBPedia(URI dbpedia) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
381 String query = "select distinct ?x ?y ?person "
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
382 + "from <file://mpiwg_persons.rdf> "
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
383 + "where { ?y <http://ontologies.mpiwg-berlin.mpg.de/authorities/namedIdentities#identifies_NamedEntity> ?person."
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
384 + "<" + dbpedia.stringValue()
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
385 + "> <http://erlangen-crm.org/110404/P1_is_identified_by> ?y.}";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
386
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
387 try {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
388 TupleQueryResult results = th.querySPARQL(query);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
389 while (results.hasNext()) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
390 BindingSet stm = results.next();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
391 Binding person = stm.getBinding("person");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
392 return (URI) person.getValue();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
393 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
394 } catch (MalformedQueryException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
395 // TODO Auto-generated catch block
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
396 e.printStackTrace();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
397 } catch (QueryEvaluationException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
398 // TODO Auto-generated catch block
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
399 e.printStackTrace();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
400 } catch (TripleStoreHandlerException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
401 // TODO Auto-generated catch block
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
402 e.printStackTrace();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
403 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
404 return null;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
405 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
406
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
407 private URI getDBPediaLink(URI subj, String inCtx) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
408 try {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
409 RepositoryResult<Statement> statements = th.getStatements(subj,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
410 th.createUri("http://www.w3.org/2002/07/owl#sameAs"), null,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
411 inCtx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
412 while (statements.hasNext()) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
413 Statement smt = statements.next();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
414 URI obj = (URI) smt.getObject();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
415 if (obj.getNamespace().equals("http://dbpedia.org/resource/")) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
416 return obj;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
417 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
418 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
419 return null;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
420 } catch (RepositoryException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
421 // TODO Auto-generated catch block
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
422 e.printStackTrace();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
423 return null;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
424 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
425 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
426
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
427 private Boolean checkExistance(HashMap<String, Object> newValues,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
428 TripleStoreHandler th2, Collection<String> props, String outCtx)
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
429 throws RepositoryException {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
430
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
431 String queryString = "select ?x FROM <" + outCtx + "> " + "where {";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
432 for (String propString : props) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
433 // RDFProperty rdfProp = model.getRDFProperty(propString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
434 String str = "";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
435 String lang = "";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
436 Object valObj = newValues.get(propString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
437 if (RDFSLiteral.class.isInstance(valObj)) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
438 RDFSLiteral val = (RDFSLiteral) valObj;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
439 if (val == null)
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
440 continue;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
441 lang = val.getLanguage();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
442 str = val.getString();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
443 str = str.replace("\"", "\\\"");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
444 queryString += "?x <" + propString + "> \"" + str + "\"@" + lang
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
445 + ".";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
446 } else {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
447 str = (String) valObj;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
448 str = str.replace("\"", "\\\"");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
449 queryString += "?x <" + propString + "> \"" + str + "\""
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
450 + ".";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
451 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
452
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
453
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
454 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
455
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
456 queryString += " }";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
457
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
458 TupleQueryResult result;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
459 try {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
460 result = th.querySPARQL(queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
461 } catch (MalformedQueryException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
462 logger.error("Query String cannot be handled:" + queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
463 return false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
464 } catch (QueryEvaluationException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
465 logger.error("Query String cannot be handled:" + queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
466 return false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
467 } catch (TripleStoreHandlerException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
468 e.printStackTrace();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
469 logger.error("Query String cannot be handled:" + queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
470 return false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
471 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
472 try {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
473 if (result.hasNext())
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
474 return true;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
475 else
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
476 return false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
477 } catch (QueryEvaluationException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
478 logger.error("Query String cannot be handled:" + queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
479 return false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
480 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
481
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
482 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
483
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
484 private int getNewId(int startnumber, String identifier, String[] ctx)
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
485 throws SQLException {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
486
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
487 java.sql.Statement smt = th.sqlCon.createStatement();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
488 Boolean exists = true;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
489
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
490 while (exists) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
491 String fromString = "";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
492 for (int i = 0; i < ctx.length; i++) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
493 fromString += String.format(" from <%s> ", ctx[i]);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
494 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
495 String cmdString = String.format(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
496 "sparql select count(*) %s where {<%s> ?x ?y}",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
497 fromString,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
498 TripleStoreHandler.ONTOLOGY_NS + identifier + ":"
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
499 + String.valueOf(startnumber));
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
500 smt.execute(cmdString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
501 ResultSet rs = smt.getResultSet();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
502 rs.next();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
503 int count = rs.getInt(1);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
504 if (count > 0) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
505 startnumber += 1;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
506 } else {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
507 exists = false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
508 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
509 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
510
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
511 return startnumber;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
512 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
513
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
514 private Boolean checkExistance(OWLIndividual person,
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
515 TripleStoreHandler th2, Collection<String> props, String outCtx)
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
516 throws RepositoryException {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
517 JenaOWLModel model = mh.getOwlModel();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
518 // Map<String, String> vals = new HashMap<String, String>();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
519
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
520 String queryString = "select ?x FROM <" + outCtx + "> " + "where {";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
521 for (String propString : props) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
522 RDFProperty rdfProp = model.getRDFProperty(propString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
523 RDFSLiteral val = (RDFSLiteral) person.getPropertyValue(rdfProp);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
524 if (val == null)
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
525 continue;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
526 String lang = val.getLanguage();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
527 String str = val.getString();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
528 str = str.replace("\"", "\\\"");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
529 queryString += "?x <" + propString + "> \"" + str + "\"@" + lang
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
530 + ".";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
531 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
532
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
533 queryString += " }";
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
534 TupleQueryResult result;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
535 try {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
536 result = th.querySPARQL(queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
537 } catch (MalformedQueryException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
538 logger.error("Query String cannot be handled:" + queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
539 return false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
540 } catch (QueryEvaluationException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
541 logger.error("Query String cannot be handled:" + queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
542 return false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
543 } catch (TripleStoreHandlerException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
544 logger.error("Query String cannot be handled:" + queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
545 return false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
546 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
547 try {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
548 if (result.hasNext())
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
549 return true;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
550 else
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
551 return false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
552 } catch (QueryEvaluationException e) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
553 logger.error("Query String cannot be handled:" + queryString);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
554 return false;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
555 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
556
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
557 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
558
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
559 private OWLIndividual createDbPediaProvider(String ctx)
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
560 throws RepositoryException, TripleStoreHandlerException {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
561 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
562
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
563 OWLIndividual dbcreation = mh.generateEntity(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
564 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
565 TripleStoreHandler.ONTOLOGY_NS + "provider_wikipedia");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
566
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
567 th.write(dbcreation, ctx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
568
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
569 return dbcreation;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
570
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
571 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
572
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
573 private OWLIndividual createDNBProvider(String ctx)
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
574 throws RepositoryException, TripleStoreHandlerException {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
575 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
576
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
577 OWLIndividual dbcreation = mh.generateEntity(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
578 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
579 TripleStoreHandler.ONTOLOGY_NS + "provider_DNB");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
580
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
581 th.write(dbcreation, ctx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
582
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
583 return dbcreation;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
584
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
585 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
586
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
587 private OWLIndividual createMPIWFProvider(String ctx)
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
588 throws RepositoryException, TripleStoreHandlerException {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
589 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
590
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
591 OWLIndividual dbcreation = mh.generateEntity(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
592 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
593 TripleStoreHandler.ONTOLOGY_NS + "provider_MPIWG");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
594
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
595 th.write(dbcreation, ctx);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
596
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
597 return dbcreation;
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
598
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
599 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
600
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
601 public static void main(String args[]) throws Exception {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
602 if (args.length < 4) {
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
603 System.out.println("usage: import user pw offset limit ");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
604 System.exit(1);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
605 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
606
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
607 Logger.getRootLogger().setLevel(Level.INFO);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
608 BasicConfigurator.configure();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
609
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
610 MetaDataHandler mh = new MetaDataHandler();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
611
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
612 // TripleStoreHandler th = new TripleStoreHandler(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
613 // "jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1111",args[0],
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
614 // args[1]);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
615
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
616 TripleStoreHandler th = new TripleStoreHandler(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
617 "jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1111", args[0], args[1]);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
618 System.out.println(mh);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
619 System.out.println(th);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
620
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
621 ImportGND imp = new ImportGND(mh, th, args[2], args[3]);
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
622
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
623 HashMap<String, String> mapping = new HashMap<String, String>();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
624 HashMap<String, String> complexMapping = new HashMap<String, String>();
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
625
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
626 // SELECT DISTINCT *
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
627 // FROM <file://mpiwg_persons.rdf>
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
628 // FROM <file:///GND.rdf>
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
629 // WHERE { ?p <http://d-nb.info/gnd/foreName> ?o.
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
630 // <http://d-nb.info/gnd/100004776>
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
631 // <http://d-nb.info/gnd/preferredNameForThePerson> ?p }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
632
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
633 // SELECT DISTINCT *
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
634 // FROM <file://mpiwg_persons.rdf>
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
635 // FROM <file:///GND.rdf>
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
636 // WHERE { ?o <http://d-nb.info/gnd/surname> ?o2.
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
637 // <http://d-nb.info/gnd/100004776>
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
638 // <http://d-nb.info/gnd/preferredNameForThePerson> ?o }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
639
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
640 complexMapping.put("?p <http://d-nb.info/gnd/surname> ?o."
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
641 + "<%s> <http://d-nb.info/gnd/preferredNameForThePerson> ?p ",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
642 "http://xmlns.com/foaf/0.1/lastName");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
643 complexMapping.put("?p <http://d-nb.info/gnd/foreName> ?o."
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
644 + "<%s> <http://d-nb.info/gnd/preferredNameForThePerson> ?p ",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
645 "http://xmlns.com/foaf/0.1/firstName");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
646 mapping.put("http://d-nb.info/gnd/preferredNameForThePerson",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
647 "http://xmlns.com/foaf/0.1/name");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
648
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
649 imp.createMPIWGFromGNDIdentifiers(
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
650 "http://RDVocab.info/ElementsGr2/identifierForThePerson",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
651 mapping, complexMapping, "file:///GND.rdf",
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
652 "file://mpiwg_persons_dnb.rdf");
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
653
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
654 // mh.getOwlModel().save(new java.net.URI("file:///tmp/prot.owl"));
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
655 }
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
656
e3ecb88314a5 minor bugs
dwinter
parents:
diff changeset
657 }