annotate src/de/mpiwg/itgroup/nimanager/importer/Import.java @ 4:f986e74583eb

removed triplestorehandler componentes
author dwinter
date Tue, 13 Dec 2011 17:46:51 +0100
parents e3ecb88314a5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1384a0d382fa first input
dwinter
parents:
diff changeset
1 package de.mpiwg.itgroup.nimanager.importer;
1384a0d382fa first input
dwinter
parents:
diff changeset
2
1384a0d382fa first input
dwinter
parents:
diff changeset
3 import java.net.URISyntaxException;
1384a0d382fa first input
dwinter
parents:
diff changeset
4 import java.net.URL;
1384a0d382fa first input
dwinter
parents:
diff changeset
5 import java.sql.Connection;
1384a0d382fa first input
dwinter
parents:
diff changeset
6 import java.sql.DriverManager;
1384a0d382fa first input
dwinter
parents:
diff changeset
7 import java.sql.ResultSet;
1384a0d382fa first input
dwinter
parents:
diff changeset
8 import java.sql.SQLException;
1384a0d382fa first input
dwinter
parents:
diff changeset
9 import java.util.Collection;
1384a0d382fa first input
dwinter
parents:
diff changeset
10 import java.util.HashMap;
1384a0d382fa first input
dwinter
parents:
diff changeset
11 import java.util.List;
1384a0d382fa first input
dwinter
parents:
diff changeset
12 import java.util.Map;
1384a0d382fa first input
dwinter
parents:
diff changeset
13
1384a0d382fa first input
dwinter
parents:
diff changeset
14 import org.apache.log4j.BasicConfigurator;
1384a0d382fa first input
dwinter
parents:
diff changeset
15 import org.apache.log4j.Level;
1384a0d382fa first input
dwinter
parents:
diff changeset
16 import org.apache.log4j.Logger;
1384a0d382fa first input
dwinter
parents:
diff changeset
17 import org.openrdf.model.Resource;
1384a0d382fa first input
dwinter
parents:
diff changeset
18 import org.openrdf.model.Statement;
1384a0d382fa first input
dwinter
parents:
diff changeset
19 import org.openrdf.model.URI;
1384a0d382fa first input
dwinter
parents:
diff changeset
20 import org.openrdf.model.Value;
1384a0d382fa first input
dwinter
parents:
diff changeset
21 import org.openrdf.model.impl.LiteralImpl;
1384a0d382fa first input
dwinter
parents:
diff changeset
22 import org.openrdf.query.BindingSet;
1384a0d382fa first input
dwinter
parents:
diff changeset
23 import org.openrdf.query.MalformedQueryException;
1384a0d382fa first input
dwinter
parents:
diff changeset
24 import org.openrdf.query.QueryEvaluationException;
1384a0d382fa first input
dwinter
parents:
diff changeset
25 import org.openrdf.query.QueryLanguage;
1384a0d382fa first input
dwinter
parents:
diff changeset
26 import org.openrdf.query.TupleQuery;
1384a0d382fa first input
dwinter
parents:
diff changeset
27 import org.openrdf.query.TupleQueryResult;
1384a0d382fa first input
dwinter
parents:
diff changeset
28 import org.openrdf.repository.RepositoryConnection;
1384a0d382fa first input
dwinter
parents:
diff changeset
29 import org.openrdf.repository.RepositoryException;
1384a0d382fa first input
dwinter
parents:
diff changeset
30 import org.openrdf.repository.RepositoryResult;
1384a0d382fa first input
dwinter
parents:
diff changeset
31
1384a0d382fa first input
dwinter
parents:
diff changeset
32 import com.hp.hpl.jena.graph.impl.LiteralLabel;
1384a0d382fa first input
dwinter
parents:
diff changeset
33
4
f986e74583eb removed triplestorehandler componentes
dwinter
parents: 2
diff changeset
34 import de.mpiwg.itgroup.triplestoremanager.exceptions.TripleStoreHandlerException;
f986e74583eb removed triplestorehandler componentes
dwinter
parents: 2
diff changeset
35 import de.mpiwg.itgroup.triplestoremanager.owl.MetaDataHandler;
f986e74583eb removed triplestorehandler componentes
dwinter
parents: 2
diff changeset
36 import de.mpiwg.itgroup.triplestoremanager.owl.TripleStoreHandler;
f986e74583eb removed triplestorehandler componentes
dwinter
parents: 2
diff changeset
37
f986e74583eb removed triplestorehandler componentes
dwinter
parents: 2
diff changeset
38
0
1384a0d382fa first input
dwinter
parents:
diff changeset
39 import edu.stanford.smi.protegex.owl.jena.JenaOWLModel;
1384a0d382fa first input
dwinter
parents:
diff changeset
40 import edu.stanford.smi.protegex.owl.model.OWLIndividual;
1384a0d382fa first input
dwinter
parents:
diff changeset
41 import edu.stanford.smi.protegex.owl.model.RDFProperty;
1384a0d382fa first input
dwinter
parents:
diff changeset
42 import edu.stanford.smi.protegex.owl.model.RDFSLiteral;
1384a0d382fa first input
dwinter
parents:
diff changeset
43
1384a0d382fa first input
dwinter
parents:
diff changeset
44 public class Import {
1384a0d382fa first input
dwinter
parents:
diff changeset
45
1384a0d382fa first input
dwinter
parents:
diff changeset
46 private MetaDataHandler mh;
1384a0d382fa first input
dwinter
parents:
diff changeset
47 private TripleStoreHandler th;
1384a0d382fa first input
dwinter
parents:
diff changeset
48 private Logger logger = Logger.getRootLogger();
1384a0d382fa first input
dwinter
parents:
diff changeset
49 // private Connection con;
1384a0d382fa first input
dwinter
parents:
diff changeset
50 private String offset;
1384a0d382fa first input
dwinter
parents:
diff changeset
51 private String limit;
1384a0d382fa first input
dwinter
parents:
diff changeset
52
1384a0d382fa first input
dwinter
parents:
diff changeset
53 public Import(MetaDataHandler mh, TripleStoreHandler th, String offset,
1384a0d382fa first input
dwinter
parents:
diff changeset
54 String limit) throws SQLException, ClassNotFoundException {
1384a0d382fa first input
dwinter
parents:
diff changeset
55 this.mh = mh;
1384a0d382fa first input
dwinter
parents:
diff changeset
56 this.th = th;
1384a0d382fa first input
dwinter
parents:
diff changeset
57 this.offset = offset;
1384a0d382fa first input
dwinter
parents:
diff changeset
58 this.limit = limit;
1384a0d382fa first input
dwinter
parents:
diff changeset
59
1384a0d382fa first input
dwinter
parents:
diff changeset
60 }
1384a0d382fa first input
dwinter
parents:
diff changeset
61
1384a0d382fa first input
dwinter
parents:
diff changeset
62 private void createMPIWGIdentifiers(String clsName,
1384a0d382fa first input
dwinter
parents:
diff changeset
63 HashMap<String, String> mapping, String inCtx, String outCtx)
1384a0d382fa first input
dwinter
parents:
diff changeset
64 throws RepositoryException, MalformedQueryException,
1384a0d382fa first input
dwinter
parents:
diff changeset
65 QueryEvaluationException, URISyntaxException,
1384a0d382fa first input
dwinter
parents:
diff changeset
66 TripleStoreHandlerException, SQLException {
1384a0d382fa first input
dwinter
parents:
diff changeset
67
1384a0d382fa first input
dwinter
parents:
diff changeset
68 int newPersonID = 0;
1384a0d382fa first input
dwinter
parents:
diff changeset
69 int namedEntityIdentifierID= 0;
1384a0d382fa first input
dwinter
parents:
diff changeset
70 int namedEntityIdentifierCreationID= 0;
1384a0d382fa first input
dwinter
parents:
diff changeset
71
1384a0d382fa first input
dwinter
parents:
diff changeset
72 List<String> classes = mh.getEquivalentClasses(clsName); // suche alle
1384a0d382fa first input
dwinter
parents:
diff changeset
73 // aequivalenten
1384a0d382fa first input
dwinter
parents:
diff changeset
74 // Klassen
1384a0d382fa first input
dwinter
parents:
diff changeset
75 classes.add(clsName); // add the classname it self;
2
e3ecb88314a5 minor bugs
dwinter
parents: 1
diff changeset
76
0
1384a0d382fa first input
dwinter
parents:
diff changeset
77 OWLIndividual providerMPIWG = th.getProvider(mh,
1384a0d382fa first input
dwinter
parents:
diff changeset
78 TripleStoreHandler.ONTOLOGY_NS + "provider_MPIWG");
1384a0d382fa first input
dwinter
parents:
diff changeset
79
1384a0d382fa first input
dwinter
parents:
diff changeset
80 if (providerMPIWG == null) {
1384a0d382fa first input
dwinter
parents:
diff changeset
81 providerMPIWG = createMPIWFProvider(outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
82 }
1384a0d382fa first input
dwinter
parents:
diff changeset
83
1384a0d382fa first input
dwinter
parents:
diff changeset
84 OWLIndividual providerDbPedia = th.getProvider(mh,
1384a0d382fa first input
dwinter
parents:
diff changeset
85 TripleStoreHandler.ONTOLOGY_NS + "provider_wikipedia");
1384a0d382fa first input
dwinter
parents:
diff changeset
86
1384a0d382fa first input
dwinter
parents:
diff changeset
87 if (providerDbPedia == null) {
1384a0d382fa first input
dwinter
parents:
diff changeset
88 providerDbPedia = createDbPediaProvider(outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
89 }
1384a0d382fa first input
dwinter
parents:
diff changeset
90 for (String cl : classes) { // gehe durch die klassen
1384a0d382fa first input
dwinter
parents:
diff changeset
91 RepositoryConnection con = th.getRepository().getConnection();
2
e3ecb88314a5 minor bugs
dwinter
parents: 1
diff changeset
92
0
1384a0d382fa first input
dwinter
parents:
diff changeset
93 String queryString = "SELECT DISTINCT ?s FROM <"
1384a0d382fa first input
dwinter
parents:
diff changeset
94 + inCtx
1384a0d382fa first input
dwinter
parents:
diff changeset
95 + "> WHERE {?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <"
1384a0d382fa first input
dwinter
parents:
diff changeset
96 + cl + "> .} OFFSET " + offset + " LIMIT " + limit;
2
e3ecb88314a5 minor bugs
dwinter
parents: 1
diff changeset
97
0
1384a0d382fa first input
dwinter
parents:
diff changeset
98 TupleQueryResult result = th.querySPARQL(queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
99
1384a0d382fa first input
dwinter
parents:
diff changeset
100
1384a0d382fa first input
dwinter
parents:
diff changeset
101 while (result.hasNext()) {
1384a0d382fa first input
dwinter
parents:
diff changeset
102 BindingSet bindingSet = result.next();
1384a0d382fa first input
dwinter
parents:
diff changeset
103 Value subjValue = bindingSet.getValue("s");
1384a0d382fa first input
dwinter
parents:
diff changeset
104 URI subj = th.getRepository().getValueFactory()
1384a0d382fa first input
dwinter
parents:
diff changeset
105 .createURI(subjValue.stringValue());
1384a0d382fa first input
dwinter
parents:
diff changeset
106
1384a0d382fa first input
dwinter
parents:
diff changeset
107 HashMap<String, Object> newValues = new HashMap<String, Object>();// werte
1384a0d382fa first input
dwinter
parents:
diff changeset
108 // die
1384a0d382fa first input
dwinter
parents:
diff changeset
109 // neu
1384a0d382fa first input
dwinter
parents:
diff changeset
110 // eingetragen
1384a0d382fa first input
dwinter
parents:
diff changeset
111 // werden
1384a0d382fa first input
dwinter
parents:
diff changeset
112 // muessen
1384a0d382fa first input
dwinter
parents:
diff changeset
113 for (String key : mapping.keySet()) {
1384a0d382fa first input
dwinter
parents:
diff changeset
114
1384a0d382fa first input
dwinter
parents:
diff changeset
115 RepositoryResult<Statement> namesStatements = con
1384a0d382fa first input
dwinter
parents:
diff changeset
116 .getStatements(subj, th.getRepository()
1384a0d382fa first input
dwinter
parents:
diff changeset
117 .getValueFactory().createURI(key), null,
1384a0d382fa first input
dwinter
parents:
diff changeset
118 false);
1384a0d382fa first input
dwinter
parents:
diff changeset
119 Statement firstStatement = TripleStoreHandler
1384a0d382fa first input
dwinter
parents:
diff changeset
120 .getFirstStatement(namesStatements);
1384a0d382fa first input
dwinter
parents:
diff changeset
121
1384a0d382fa first input
dwinter
parents:
diff changeset
122 if (firstStatement != null) {
1384a0d382fa first input
dwinter
parents:
diff changeset
123 Object newValue;
1384a0d382fa first input
dwinter
parents:
diff changeset
124 Value val = firstStatement.getObject();
1384a0d382fa first input
dwinter
parents:
diff changeset
125
1384a0d382fa first input
dwinter
parents:
diff changeset
126 if (LiteralImpl.class.isInstance(val)) { // wenn ein
1384a0d382fa first input
dwinter
parents:
diff changeset
127 // string
1384a0d382fa first input
dwinter
parents:
diff changeset
128 // literal,
1384a0d382fa first input
dwinter
parents:
diff changeset
129 // dann
1384a0d382fa first input
dwinter
parents:
diff changeset
130 // uebersetze
1384a0d382fa first input
dwinter
parents:
diff changeset
131 // in jena
1384a0d382fa first input
dwinter
parents:
diff changeset
132 // string
1384a0d382fa first input
dwinter
parents:
diff changeset
133 // literal
1384a0d382fa first input
dwinter
parents:
diff changeset
134 LiteralImpl li = (LiteralImpl) val;
1384a0d382fa first input
dwinter
parents:
diff changeset
135
1384a0d382fa first input
dwinter
parents:
diff changeset
136 newValue = mh.getOwlModel()
1384a0d382fa first input
dwinter
parents:
diff changeset
137 .createRDFSLiteralOrString(li.getLabel(),
1384a0d382fa first input
dwinter
parents:
diff changeset
138 li.getLanguage());
1384a0d382fa first input
dwinter
parents:
diff changeset
139 } else { // anderfalls dern string wert = uri
1384a0d382fa first input
dwinter
parents:
diff changeset
140 newValue = val.stringValue();
1384a0d382fa first input
dwinter
parents:
diff changeset
141
1384a0d382fa first input
dwinter
parents:
diff changeset
142 }
1384a0d382fa first input
dwinter
parents:
diff changeset
143
1384a0d382fa first input
dwinter
parents:
diff changeset
144 newValues.put(mapping.get(key), newValue);
1384a0d382fa first input
dwinter
parents:
diff changeset
145 }
1384a0d382fa first input
dwinter
parents:
diff changeset
146 }
1384a0d382fa first input
dwinter
parents:
diff changeset
147
1384a0d382fa first input
dwinter
parents:
diff changeset
148 // first create the new person
1384a0d382fa first input
dwinter
parents:
diff changeset
149
1384a0d382fa first input
dwinter
parents:
diff changeset
150
1384a0d382fa first input
dwinter
parents:
diff changeset
151 Boolean ex = checkExistance(newValues,th, mapping.values(),
1384a0d382fa first input
dwinter
parents:
diff changeset
152 outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
153
1384a0d382fa first input
dwinter
parents:
diff changeset
154 if (ex) {
1384a0d382fa first input
dwinter
parents:
diff changeset
155 logger.info("nothing to be done!");
1384a0d382fa first input
dwinter
parents:
diff changeset
156 continue;
1384a0d382fa first input
dwinter
parents:
diff changeset
157 }
1384a0d382fa first input
dwinter
parents:
diff changeset
158 newPersonID = getNewId(newPersonID, "Person", outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
159 logger.info("New ID choosen:" + String.valueOf(newPersonID));
1384a0d382fa first input
dwinter
parents:
diff changeset
160 OWLIndividual person = mh.generateEntity(
1384a0d382fa first input
dwinter
parents:
diff changeset
161 TripleStoreHandler.ONTOLOGY_NS + "Person",
1384a0d382fa first input
dwinter
parents:
diff changeset
162 TripleStoreHandler.ONTOLOGY_NS + "Person:"
1384a0d382fa first input
dwinter
parents:
diff changeset
163 + String.valueOf(newPersonID), newValues);
1384a0d382fa first input
dwinter
parents:
diff changeset
164 mh.printIndividual(person);
1384a0d382fa first input
dwinter
parents:
diff changeset
165
1384a0d382fa first input
dwinter
parents:
diff changeset
166 newPersonID += 1;
1384a0d382fa first input
dwinter
parents:
diff changeset
167 // Boolean ex = checkExistance(person, th, mapping.values(),
1384a0d382fa first input
dwinter
parents:
diff changeset
168 // outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
169 // if (ex) {
1384a0d382fa first input
dwinter
parents:
diff changeset
170 // logger.info("nothing to be done!");
1384a0d382fa first input
dwinter
parents:
diff changeset
171 // person.delete();
1384a0d382fa first input
dwinter
parents:
diff changeset
172 // continue;
1384a0d382fa first input
dwinter
parents:
diff changeset
173 // }
1384a0d382fa first input
dwinter
parents:
diff changeset
174 th.write(person, outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
175
1384a0d382fa first input
dwinter
parents:
diff changeset
176 // now we create the MPIWG identifier and connect it to the
1384a0d382fa first input
dwinter
parents:
diff changeset
177 // person
1384a0d382fa first input
dwinter
parents:
diff changeset
178 HashMap<String, Object> idValues = new HashMap<String, Object>();
1384a0d382fa first input
dwinter
parents:
diff changeset
179 idValues.put(TripleStoreHandler.ONTOLOGY_NS
1384a0d382fa first input
dwinter
parents:
diff changeset
180 + "is_preferred_namedEntityIdentifier", person);
1384a0d382fa first input
dwinter
parents:
diff changeset
181 idValues.put(
1384a0d382fa first input
dwinter
parents:
diff changeset
182 "http://erlangen-crm.org/plus/xdt/110404/has_XSD_String",
1384a0d382fa first input
dwinter
parents:
diff changeset
183 "Person:" + String.valueOf(newPersonID)); // TODO PID
1384a0d382fa first input
dwinter
parents:
diff changeset
184 // GENERATOR
1384a0d382fa first input
dwinter
parents:
diff changeset
185
1384a0d382fa first input
dwinter
parents:
diff changeset
186 namedEntityIdentifierID = getNewId(namedEntityIdentifierID, "NamedEntityIdentifier", outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
187 OWLIndividual mpiwgIdentifier = mh.generateEntity(
1384a0d382fa first input
dwinter
parents:
diff changeset
188 TripleStoreHandler.ONTOLOGY_NS
1384a0d382fa first input
dwinter
parents:
diff changeset
189 + "NamedEntityIdentifier",
1384a0d382fa first input
dwinter
parents:
diff changeset
190 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifier:"
1384a0d382fa first input
dwinter
parents:
diff changeset
191 + String.valueOf(namedEntityIdentifierID),
1384a0d382fa first input
dwinter
parents:
diff changeset
192 idValues);
1384a0d382fa first input
dwinter
parents:
diff changeset
193
1384a0d382fa first input
dwinter
parents:
diff changeset
194 namedEntityIdentifierID+=1;
1384a0d382fa first input
dwinter
parents:
diff changeset
195 th.write(mpiwgIdentifier, outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
196
1384a0d382fa first input
dwinter
parents:
diff changeset
197 // now create the creation object and connect it to the MPIWG
1384a0d382fa first input
dwinter
parents:
diff changeset
198 // identifier and the provider MPIWG
1384a0d382fa first input
dwinter
parents:
diff changeset
199 HashMap<String, Object> creationValues = new HashMap<String, Object>();
1384a0d382fa first input
dwinter
parents:
diff changeset
200 creationValues.put(TripleStoreHandler.ONTOLOGY_NS
1384a0d382fa first input
dwinter
parents:
diff changeset
201 + "carriedOutByNamedEntityProvider", providerMPIWG);
1384a0d382fa first input
dwinter
parents:
diff changeset
202 creationValues.put(TripleStoreHandler.ONTOLOGY_NS
1384a0d382fa first input
dwinter
parents:
diff changeset
203 + "created_NamedEntityIdentifier", mpiwgIdentifier);
1384a0d382fa first input
dwinter
parents:
diff changeset
204
1384a0d382fa first input
dwinter
parents:
diff changeset
205
1384a0d382fa first input
dwinter
parents:
diff changeset
206 namedEntityIdentifierCreationID = getNewId(namedEntityIdentifierCreationID, "NamedEntityIdentifierCreation", outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
207
1384a0d382fa first input
dwinter
parents:
diff changeset
208 OWLIndividual creation = mh.generateEntity(
1384a0d382fa first input
dwinter
parents:
diff changeset
209 TripleStoreHandler.ONTOLOGY_NS
1384a0d382fa first input
dwinter
parents:
diff changeset
210 + "NamedEntityIdentifierCreation",
1384a0d382fa first input
dwinter
parents:
diff changeset
211 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifierCreation:"
1384a0d382fa first input
dwinter
parents:
diff changeset
212 + String.valueOf(namedEntityIdentifierCreationID),
1384a0d382fa first input
dwinter
parents:
diff changeset
213 creationValues);
1384a0d382fa first input
dwinter
parents:
diff changeset
214 namedEntityIdentifierCreationID+=1;
1384a0d382fa first input
dwinter
parents:
diff changeset
215
1384a0d382fa first input
dwinter
parents:
diff changeset
216 th.write(creation, outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
217
1384a0d382fa first input
dwinter
parents:
diff changeset
218 // now create the wikipedia identifier and connect it to the
1384a0d382fa first input
dwinter
parents:
diff changeset
219 // person
1384a0d382fa first input
dwinter
parents:
diff changeset
220 HashMap<String, Object> db_idValues = new HashMap<String, Object>();
1384a0d382fa first input
dwinter
parents:
diff changeset
221 db_idValues.put(TripleStoreHandler.ONTOLOGY_NS
1384a0d382fa first input
dwinter
parents:
diff changeset
222 + "identifies_NamedEntity", person);
1384a0d382fa first input
dwinter
parents:
diff changeset
223
1384a0d382fa first input
dwinter
parents:
diff changeset
224 // identifier is the url at dbpedia
1384a0d382fa first input
dwinter
parents:
diff changeset
225 db_idValues
1384a0d382fa first input
dwinter
parents:
diff changeset
226 .put("http://erlangen-crm.org/plus/xdt/110404/has_XSD_String",
1384a0d382fa first input
dwinter
parents:
diff changeset
227 subjValue.stringValue());
1384a0d382fa first input
dwinter
parents:
diff changeset
228
1384a0d382fa first input
dwinter
parents:
diff changeset
229 namedEntityIdentifierID = getNewId(namedEntityIdentifierID, "NamedEntityIdentifier", outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
230
1384a0d382fa first input
dwinter
parents:
diff changeset
231 OWLIndividual dbIdentifier = mh.generateEntity(
1384a0d382fa first input
dwinter
parents:
diff changeset
232 TripleStoreHandler.ONTOLOGY_NS
1384a0d382fa first input
dwinter
parents:
diff changeset
233 + "NamedEntityIdentifier",
1384a0d382fa first input
dwinter
parents:
diff changeset
234 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifier:"
1384a0d382fa first input
dwinter
parents:
diff changeset
235 + String.valueOf(namedEntityIdentifierID),
1384a0d382fa first input
dwinter
parents:
diff changeset
236 db_idValues);
1384a0d382fa first input
dwinter
parents:
diff changeset
237
1384a0d382fa first input
dwinter
parents:
diff changeset
238 namedEntityIdentifierID+=1;
1384a0d382fa first input
dwinter
parents:
diff changeset
239
1384a0d382fa first input
dwinter
parents:
diff changeset
240 th.write(dbIdentifier, outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
241
1384a0d382fa first input
dwinter
parents:
diff changeset
242 // now create the creation object and connect it to the dbpedia
1384a0d382fa first input
dwinter
parents:
diff changeset
243 // identifier and the provider pdbedia
1384a0d382fa first input
dwinter
parents:
diff changeset
244 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
1384a0d382fa first input
dwinter
parents:
diff changeset
245 db_creationValues.put(TripleStoreHandler.ONTOLOGY_NS
1384a0d382fa first input
dwinter
parents:
diff changeset
246 + "carriedOutByNamedEntityProvider", providerDbPedia);
1384a0d382fa first input
dwinter
parents:
diff changeset
247 db_creationValues.put(TripleStoreHandler.ONTOLOGY_NS
1384a0d382fa first input
dwinter
parents:
diff changeset
248 + "created_NamedEntityIdentifier", dbIdentifier);
1384a0d382fa first input
dwinter
parents:
diff changeset
249
1384a0d382fa first input
dwinter
parents:
diff changeset
250
1384a0d382fa first input
dwinter
parents:
diff changeset
251 namedEntityIdentifierCreationID = getNewId(namedEntityIdentifierCreationID, "NamedEntityIdentifierCreation", outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
252
1384a0d382fa first input
dwinter
parents:
diff changeset
253 OWLIndividual dbcreation = mh.generateEntity(
1384a0d382fa first input
dwinter
parents:
diff changeset
254 TripleStoreHandler.ONTOLOGY_NS
1384a0d382fa first input
dwinter
parents:
diff changeset
255 + "NamedEntityIdentifierCreation",
1384a0d382fa first input
dwinter
parents:
diff changeset
256 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityIdentifierCreation:"
1384a0d382fa first input
dwinter
parents:
diff changeset
257 + String.valueOf(namedEntityIdentifierCreationID),
1384a0d382fa first input
dwinter
parents:
diff changeset
258 db_creationValues);
1384a0d382fa first input
dwinter
parents:
diff changeset
259 namedEntityIdentifierCreationID+=1;
1384a0d382fa first input
dwinter
parents:
diff changeset
260
1384a0d382fa first input
dwinter
parents:
diff changeset
261
1384a0d382fa first input
dwinter
parents:
diff changeset
262
1384a0d382fa first input
dwinter
parents:
diff changeset
263 th.write(dbcreation, outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
264
1384a0d382fa first input
dwinter
parents:
diff changeset
265 // add the bbpedia identifier to the triple store
1384a0d382fa first input
dwinter
parents:
diff changeset
266 th.write(subj.stringValue(),
1384a0d382fa first input
dwinter
parents:
diff changeset
267 "http://erlangen-crm.org/110404/P1_is_identified_by",
1384a0d382fa first input
dwinter
parents:
diff changeset
268 dbIdentifier.getURI(), outCtx);
1384a0d382fa first input
dwinter
parents:
diff changeset
269 ;
1384a0d382fa first input
dwinter
parents:
diff changeset
270
1384a0d382fa first input
dwinter
parents:
diff changeset
271 }
1384a0d382fa first input
dwinter
parents:
diff changeset
272 }
1384a0d382fa first input
dwinter
parents:
diff changeset
273
1384a0d382fa first input
dwinter
parents:
diff changeset
274 }
1384a0d382fa first input
dwinter
parents:
diff changeset
275
1384a0d382fa first input
dwinter
parents:
diff changeset
276 private Boolean checkExistance(HashMap<String, Object> newValues,
1384a0d382fa first input
dwinter
parents:
diff changeset
277 TripleStoreHandler th2, Collection<String> props, String outCtx) throws RepositoryException {
1384a0d382fa first input
dwinter
parents:
diff changeset
278
1384a0d382fa first input
dwinter
parents:
diff changeset
279 String queryString = "select ?x FROM <" + outCtx + "> " + "where {";
1384a0d382fa first input
dwinter
parents:
diff changeset
280 for (String propString : props) {
1384a0d382fa first input
dwinter
parents:
diff changeset
281 //RDFProperty rdfProp = model.getRDFProperty(propString);
1384a0d382fa first input
dwinter
parents:
diff changeset
282 RDFSLiteral val = (RDFSLiteral) newValues.get(propString);
1384a0d382fa first input
dwinter
parents:
diff changeset
283 if (val==null)
1384a0d382fa first input
dwinter
parents:
diff changeset
284 continue;
1384a0d382fa first input
dwinter
parents:
diff changeset
285 String lang = val.getLanguage();
1384a0d382fa first input
dwinter
parents:
diff changeset
286 String str = val.getString();
1384a0d382fa first input
dwinter
parents:
diff changeset
287 str = str.replace("\"", "\\\"");
1384a0d382fa first input
dwinter
parents:
diff changeset
288 queryString += "?x <" + propString + "> \"" + str + "\"@" + lang
1384a0d382fa first input
dwinter
parents:
diff changeset
289 + ".";
1384a0d382fa first input
dwinter
parents:
diff changeset
290 }
1384a0d382fa first input
dwinter
parents:
diff changeset
291
1384a0d382fa first input
dwinter
parents:
diff changeset
292 queryString += " }";
1384a0d382fa first input
dwinter
parents:
diff changeset
293 TupleQueryResult result;
1384a0d382fa first input
dwinter
parents:
diff changeset
294 try {
1384a0d382fa first input
dwinter
parents:
diff changeset
295 result = th.querySPARQL(queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
296 } catch (MalformedQueryException e) {
1384a0d382fa first input
dwinter
parents:
diff changeset
297 logger.error("Query String cannot be handled:" + queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
298 return false;
1384a0d382fa first input
dwinter
parents:
diff changeset
299 } catch (QueryEvaluationException e) {
1384a0d382fa first input
dwinter
parents:
diff changeset
300 logger.error("Query String cannot be handled:" + queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
301 return false;
1384a0d382fa first input
dwinter
parents:
diff changeset
302 } catch (TripleStoreHandlerException e) {
1384a0d382fa first input
dwinter
parents:
diff changeset
303 e.printStackTrace();
1384a0d382fa first input
dwinter
parents:
diff changeset
304 logger.error("Query String cannot be handled:" + queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
305 return false;
1384a0d382fa first input
dwinter
parents:
diff changeset
306 }
1384a0d382fa first input
dwinter
parents:
diff changeset
307 try {
1384a0d382fa first input
dwinter
parents:
diff changeset
308 if (result.hasNext())
1384a0d382fa first input
dwinter
parents:
diff changeset
309 return true;
1384a0d382fa first input
dwinter
parents:
diff changeset
310 else
1384a0d382fa first input
dwinter
parents:
diff changeset
311 return false;
1384a0d382fa first input
dwinter
parents:
diff changeset
312 } catch (QueryEvaluationException e) {
1384a0d382fa first input
dwinter
parents:
diff changeset
313 logger.error("Query String cannot be handled:" + queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
314 return false;
1384a0d382fa first input
dwinter
parents:
diff changeset
315 }
1384a0d382fa first input
dwinter
parents:
diff changeset
316
1384a0d382fa first input
dwinter
parents:
diff changeset
317 }
1384a0d382fa first input
dwinter
parents:
diff changeset
318 private int getNewId(int startnumber, String identifier, String ctx)
1384a0d382fa first input
dwinter
parents:
diff changeset
319 throws SQLException {
1384a0d382fa first input
dwinter
parents:
diff changeset
320
1384a0d382fa first input
dwinter
parents:
diff changeset
321 java.sql.Statement smt = th.sqlCon.createStatement();
1384a0d382fa first input
dwinter
parents:
diff changeset
322 Boolean exists = true;
1384a0d382fa first input
dwinter
parents:
diff changeset
323
1384a0d382fa first input
dwinter
parents:
diff changeset
324 while (exists) {
1384a0d382fa first input
dwinter
parents:
diff changeset
325 String cmdString = String.format(
1384a0d382fa first input
dwinter
parents:
diff changeset
326 "sparql select count(*) from <%s> where {<%s> ?x ?y}",
1384a0d382fa first input
dwinter
parents:
diff changeset
327 ctx,
2
e3ecb88314a5 minor bugs
dwinter
parents: 1
diff changeset
328 TripleStoreHandler.ONTOLOGY_NS + identifier +":"
0
1384a0d382fa first input
dwinter
parents:
diff changeset
329 + String.valueOf(startnumber));
1384a0d382fa first input
dwinter
parents:
diff changeset
330 smt.execute(cmdString);
1384a0d382fa first input
dwinter
parents:
diff changeset
331 ResultSet rs = smt.getResultSet();
1384a0d382fa first input
dwinter
parents:
diff changeset
332 rs.next();
1384a0d382fa first input
dwinter
parents:
diff changeset
333 int count = rs.getInt(1);
1384a0d382fa first input
dwinter
parents:
diff changeset
334 if (count > 0) {
1384a0d382fa first input
dwinter
parents:
diff changeset
335 startnumber += 1;
1384a0d382fa first input
dwinter
parents:
diff changeset
336 } else {
1384a0d382fa first input
dwinter
parents:
diff changeset
337 exists = false;
1384a0d382fa first input
dwinter
parents:
diff changeset
338 }
1384a0d382fa first input
dwinter
parents:
diff changeset
339 }
1384a0d382fa first input
dwinter
parents:
diff changeset
340
1384a0d382fa first input
dwinter
parents:
diff changeset
341 return startnumber;
1384a0d382fa first input
dwinter
parents:
diff changeset
342 }
1384a0d382fa first input
dwinter
parents:
diff changeset
343
1384a0d382fa first input
dwinter
parents:
diff changeset
344 private Boolean checkExistance(OWLIndividual person,
1384a0d382fa first input
dwinter
parents:
diff changeset
345 TripleStoreHandler th2, Collection<String> props, String outCtx)
1384a0d382fa first input
dwinter
parents:
diff changeset
346 throws RepositoryException {
1384a0d382fa first input
dwinter
parents:
diff changeset
347 JenaOWLModel model = mh.getOwlModel();
1384a0d382fa first input
dwinter
parents:
diff changeset
348 //Map<String, String> vals = new HashMap<String, String>();
1384a0d382fa first input
dwinter
parents:
diff changeset
349
1384a0d382fa first input
dwinter
parents:
diff changeset
350 String queryString = "select ?x FROM <" + outCtx + "> " + "where {";
1384a0d382fa first input
dwinter
parents:
diff changeset
351 for (String propString : props) {
1384a0d382fa first input
dwinter
parents:
diff changeset
352 RDFProperty rdfProp = model.getRDFProperty(propString);
1384a0d382fa first input
dwinter
parents:
diff changeset
353 RDFSLiteral val = (RDFSLiteral) person.getPropertyValue(rdfProp);
1384a0d382fa first input
dwinter
parents:
diff changeset
354 if (val == null)
1384a0d382fa first input
dwinter
parents:
diff changeset
355 continue;
1384a0d382fa first input
dwinter
parents:
diff changeset
356 String lang = val.getLanguage();
1384a0d382fa first input
dwinter
parents:
diff changeset
357 String str = val.getString();
1384a0d382fa first input
dwinter
parents:
diff changeset
358 str = str.replace("\"", "\\\"");
1384a0d382fa first input
dwinter
parents:
diff changeset
359 queryString += "?x <" + propString + "> \"" + str + "\"@" + lang
1384a0d382fa first input
dwinter
parents:
diff changeset
360 + ".";
1384a0d382fa first input
dwinter
parents:
diff changeset
361 }
1384a0d382fa first input
dwinter
parents:
diff changeset
362
1384a0d382fa first input
dwinter
parents:
diff changeset
363 queryString += " }";
1384a0d382fa first input
dwinter
parents:
diff changeset
364 TupleQueryResult result;
1384a0d382fa first input
dwinter
parents:
diff changeset
365 try {
1384a0d382fa first input
dwinter
parents:
diff changeset
366 result = th.querySPARQL(queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
367 } catch (MalformedQueryException e) {
1384a0d382fa first input
dwinter
parents:
diff changeset
368 logger.error("Query String cannot be handled:" + queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
369 return false;
1384a0d382fa first input
dwinter
parents:
diff changeset
370 } catch (QueryEvaluationException e) {
1384a0d382fa first input
dwinter
parents:
diff changeset
371 logger.error("Query String cannot be handled:" + queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
372 return false;
1384a0d382fa first input
dwinter
parents:
diff changeset
373 } catch (TripleStoreHandlerException e) {
1384a0d382fa first input
dwinter
parents:
diff changeset
374 logger.error("Query String cannot be handled:" + queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
375 return false;
1384a0d382fa first input
dwinter
parents:
diff changeset
376 }
1384a0d382fa first input
dwinter
parents:
diff changeset
377 try {
1384a0d382fa first input
dwinter
parents:
diff changeset
378 if (result.hasNext())
1384a0d382fa first input
dwinter
parents:
diff changeset
379 return true;
1384a0d382fa first input
dwinter
parents:
diff changeset
380 else
1384a0d382fa first input
dwinter
parents:
diff changeset
381 return false;
1384a0d382fa first input
dwinter
parents:
diff changeset
382 } catch (QueryEvaluationException e) {
1384a0d382fa first input
dwinter
parents:
diff changeset
383 logger.error("Query String cannot be handled:" + queryString);
1384a0d382fa first input
dwinter
parents:
diff changeset
384 return false;
1384a0d382fa first input
dwinter
parents:
diff changeset
385 }
1384a0d382fa first input
dwinter
parents:
diff changeset
386
1384a0d382fa first input
dwinter
parents:
diff changeset
387 }
1384a0d382fa first input
dwinter
parents:
diff changeset
388
1384a0d382fa first input
dwinter
parents:
diff changeset
389 private OWLIndividual createDbPediaProvider(String ctx)
1384a0d382fa first input
dwinter
parents:
diff changeset
390 throws RepositoryException, TripleStoreHandlerException {
1384a0d382fa first input
dwinter
parents:
diff changeset
391 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
1384a0d382fa first input
dwinter
parents:
diff changeset
392
1384a0d382fa first input
dwinter
parents:
diff changeset
393 OWLIndividual dbcreation = mh.generateEntity(
1384a0d382fa first input
dwinter
parents:
diff changeset
394 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider",
1384a0d382fa first input
dwinter
parents:
diff changeset
395 TripleStoreHandler.ONTOLOGY_NS + "provider_wikipedia");
1384a0d382fa first input
dwinter
parents:
diff changeset
396
1384a0d382fa first input
dwinter
parents:
diff changeset
397 th.write(dbcreation, ctx);
1384a0d382fa first input
dwinter
parents:
diff changeset
398
1384a0d382fa first input
dwinter
parents:
diff changeset
399 return dbcreation;
1384a0d382fa first input
dwinter
parents:
diff changeset
400
1384a0d382fa first input
dwinter
parents:
diff changeset
401 }
1384a0d382fa first input
dwinter
parents:
diff changeset
402
1384a0d382fa first input
dwinter
parents:
diff changeset
403 private OWLIndividual createMPIWFProvider(String ctx)
1384a0d382fa first input
dwinter
parents:
diff changeset
404 throws RepositoryException, TripleStoreHandlerException {
1384a0d382fa first input
dwinter
parents:
diff changeset
405 HashMap<String, Object> db_creationValues = new HashMap<String, Object>();
1384a0d382fa first input
dwinter
parents:
diff changeset
406
1384a0d382fa first input
dwinter
parents:
diff changeset
407 OWLIndividual dbcreation = mh.generateEntity(
1384a0d382fa first input
dwinter
parents:
diff changeset
408 TripleStoreHandler.ONTOLOGY_NS + "NamedEntityProvider",
1384a0d382fa first input
dwinter
parents:
diff changeset
409 TripleStoreHandler.ONTOLOGY_NS + "provider_MPIWG");
1384a0d382fa first input
dwinter
parents:
diff changeset
410
1384a0d382fa first input
dwinter
parents:
diff changeset
411 th.write(dbcreation, ctx);
1384a0d382fa first input
dwinter
parents:
diff changeset
412
1384a0d382fa first input
dwinter
parents:
diff changeset
413 return dbcreation;
1384a0d382fa first input
dwinter
parents:
diff changeset
414
1384a0d382fa first input
dwinter
parents:
diff changeset
415 }
1384a0d382fa first input
dwinter
parents:
diff changeset
416
1384a0d382fa first input
dwinter
parents:
diff changeset
417 public static void main(String args[]) throws Exception {
1
b8333fab0d95 minor bugs
dwinter
parents: 0
diff changeset
418 if (args.length < 4) {
b8333fab0d95 minor bugs
dwinter
parents: 0
diff changeset
419 System.out.println("usage: import user pw offset limit ");
0
1384a0d382fa first input
dwinter
parents:
diff changeset
420 System.exit(1);
1384a0d382fa first input
dwinter
parents:
diff changeset
421 }
1384a0d382fa first input
dwinter
parents:
diff changeset
422
1384a0d382fa first input
dwinter
parents:
diff changeset
423 Logger.getRootLogger().setLevel(Level.INFO);
1384a0d382fa first input
dwinter
parents:
diff changeset
424 BasicConfigurator.configure();
1384a0d382fa first input
dwinter
parents:
diff changeset
425
1384a0d382fa first input
dwinter
parents:
diff changeset
426 MetaDataHandler mh = new MetaDataHandler();
1384a0d382fa first input
dwinter
parents:
diff changeset
427
1384a0d382fa first input
dwinter
parents:
diff changeset
428 TripleStoreHandler th = new TripleStoreHandler(
1
b8333fab0d95 minor bugs
dwinter
parents: 0
diff changeset
429 "jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1111",args[0], args[1]);
0
1384a0d382fa first input
dwinter
parents:
diff changeset
430 System.out.println(mh);
1384a0d382fa first input
dwinter
parents:
diff changeset
431 System.out.println(th);
1384a0d382fa first input
dwinter
parents:
diff changeset
432
1
b8333fab0d95 minor bugs
dwinter
parents: 0
diff changeset
433 Import imp = new Import(mh, th, args[2], args[3]);
0
1384a0d382fa first input
dwinter
parents:
diff changeset
434
1384a0d382fa first input
dwinter
parents:
diff changeset
435 HashMap<String, String> mapping = new HashMap<String, String>();
1384a0d382fa first input
dwinter
parents:
diff changeset
436 mapping.put("http://xmlns.com/foaf/0.1/surname",
1384a0d382fa first input
dwinter
parents:
diff changeset
437 "http://xmlns.com/foaf/0.1/lastName");
1384a0d382fa first input
dwinter
parents:
diff changeset
438 mapping.put("http://xmlns.com/foaf/0.1/givenName",
1384a0d382fa first input
dwinter
parents:
diff changeset
439 "http://xmlns.com/foaf/0.1/firstName");
1384a0d382fa first input
dwinter
parents:
diff changeset
440 mapping.put("http://xmlns.com/foaf/0.1/name",
1384a0d382fa first input
dwinter
parents:
diff changeset
441 "http://xmlns.com/foaf/0.1/name");
1384a0d382fa first input
dwinter
parents:
diff changeset
442
1384a0d382fa first input
dwinter
parents:
diff changeset
443 imp.createMPIWGIdentifiers("http://dbpedia.org/ontology/Person",
1384a0d382fa first input
dwinter
parents:
diff changeset
444 mapping, "file://personendataWikipedia",
2
e3ecb88314a5 minor bugs
dwinter
parents: 1
diff changeset
445 "file://mpiwg_persons_2.rdf");
0
1384a0d382fa first input
dwinter
parents:
diff changeset
446
1384a0d382fa first input
dwinter
parents:
diff changeset
447 // mh.getOwlModel().save(new java.net.URI("file:///tmp/prot.owl"));
1384a0d382fa first input
dwinter
parents:
diff changeset
448 }
1384a0d382fa first input
dwinter
parents:
diff changeset
449
1384a0d382fa first input
dwinter
parents:
diff changeset
450 }