Mercurial > hg > mpdl-group
comparison software/eXist/webapp/mpdl/lucene/search.xql @ 11:d6f528ad5d96
TEI Unterst?tzung, Fehlerbehebungen, externe Objekte
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Fri, 11 Mar 2011 13:34:02 +0100 |
parents | 5589d865af7a |
children | e99964f390e4 |
comparison
equal
deleted
inserted
replaced
10:59ff47d1e237 | 11:d6f528ad5d96 |
---|---|
5 import module namespace functx = "http://www.functx.com" at "../util/functx.xql"; | 5 import module namespace functx = "http://www.functx.com" at "../util/functx.xql"; |
6 | 6 |
7 declare namespace ft = "http://exist-db.org/xquery/lucene"; | 7 declare namespace ft = "http://exist-db.org/xquery/lucene"; |
8 | 8 |
9 declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/"; | 9 declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/"; |
10 declare namespace TEI="http://www.tei-c.org/ns/1.0"; | |
10 | 11 |
11 declare namespace dc="http://purl.org/dc/elements/1.1/"; | 12 declare namespace dc="http://purl.org/dc/elements/1.1/"; |
12 declare namespace dcterms="http://purl.org/dc/terms"; | 13 declare namespace dcterms="http://purl.org/dc/terms"; |
13 | 14 |
14 declare function mpdl-lucene:search($mpdlCollection, $queryStr) { | 15 declare function mpdl-lucene:search($mpdlCollection, $queryStr) { |
15 let $luceneParseResult := mpdltext:lucene-parse-query($queryStr) | 16 let $luceneParseResult := mpdltext:lucene-parse-query($queryStr) |
16 let $result := | 17 let $result := |
17 if ($luceneParseResult = '') | 18 if ($luceneParseResult = '') |
18 then $mpdlCollection/.[ft:query(archimedes/text, $queryStr) or ft:query(echo:echo/echo:text, $queryStr)] | 19 then $mpdlCollection/.[ft:query(archimedes/text, $queryStr) or ft:query(echo:echo/echo:text, $queryStr) or ft:query(TEI:TEI/TEI:text, $queryStr)] |
19 else | 20 else |
20 <result> | 21 <result> |
21 <error>{$luceneParseResult}</error> | 22 <error>{$luceneParseResult}</error> |
22 <size>0</size> | 23 <size>0</size> |
23 <pages>0</pages> | 24 <pages>0</pages> |
39 let $pageBreaks := | 40 let $pageBreaks := |
40 if ($mpdlCollectionName = 'archimedes') | 41 if ($mpdlCollectionName = 'archimedes') |
41 then $document//pb | 42 then $document//pb |
42 else if ($mpdlCollectionName = 'echo') | 43 else if ($mpdlCollectionName = 'echo') |
43 then $document//echo:pb | 44 then $document//echo:pb |
45 else if ($mpdlCollectionName = 'tei') | |
46 then $document//TEI:pb | |
44 else $document//pb | 47 else $document//pb |
45 let $luceneParseResult := mpdltext:lucene-parse-query($queryStr) | 48 let $luceneParseResult := mpdltext:lucene-parse-query($queryStr) |
46 let $t := | 49 let $t := |
47 if ($luceneParseResult != '') | 50 if ($luceneParseResult != '') |
48 then () | 51 then () |
49 else if ($mpdlCollectionName = 'archimedes') | 52 else if ($mpdlCollectionName = 'archimedes') |
50 then $document//s[ft:query(., $query)] | 53 then $document//s[ft:query(., $query)] |
51 else if ($mpdlCollectionName = 'echo') | 54 else if ($mpdlCollectionName = 'echo') |
52 then $document//echo:s[ft:query(., $query)] | 55 then $document//echo:s[ft:query(., $query)] |
56 else if ($mpdlCollectionName = 'tei') | |
57 then $document//TEI:s[ft:query(., $query)] | |
53 else $document//s[ft:query(., $query)] | 58 else $document//s[ft:query(., $query)] |
54 let $from := ($pn * $pageSize) - $pageSize + 1 | 59 let $from := ($pn * $pageSize) - $pageSize + 1 |
55 let $to := $pn * $pageSize | 60 let $to := $pn * $pageSize |
56 (: performance improvements: result set of 500 needs 3 sec., result set of 10 needs 0,7 sec.:) | 61 (: performance improvements: result set of 500 needs 3 sec., result set of 10 needs 0,7 sec.:) |
57 let $tempQueryResult := | 62 let $tempQueryResult := |
71 let $posOfS := (: faster: comparison only in s elements of this document :) | 76 let $posOfS := (: faster: comparison only in s elements of this document :) |
72 if ($mpdlCollectionName = 'archimedes') | 77 if ($mpdlCollectionName = 'archimedes') |
73 then count($pb/following::s[. << $s]) + 1 | 78 then count($pb/following::s[. << $s]) + 1 |
74 else if ($mpdlCollectionName = 'echo') | 79 else if ($mpdlCollectionName = 'echo') |
75 then count($pb/following::echo:s[. << $s]) + 1 | 80 then count($pb/following::echo:s[. << $s]) + 1 |
81 else if ($mpdlCollectionName = 'tei') | |
82 then count($pb/following::TEI:s[. << $s]) + 1 | |
76 else count($pb/following::s[. << $s]) + 1 | 83 else count($pb/following::s[. << $s]) + 1 |
77 let $position := $from - 1 + $pos | 84 let $position := $from - 1 + $pos |
78 let $resultElem := | 85 let $resultElem := |
79 <hit> | 86 <hit> |
80 <pos>{$position}</pos> | 87 <pos>{$position}</pos> |
201 let $result := | 208 let $result := |
202 if ($docBase = 'archimedes') | 209 if ($docBase = 'archimedes') |
203 then $doc/archimedes/info | 210 then $doc/archimedes/info |
204 else if ($docBase = 'echo') | 211 else if ($docBase = 'echo') |
205 then $doc/echo:echo/echo:metadata | 212 then $doc/echo:echo/echo:metadata |
213 else if ($docBase = 'tei') | |
214 then $doc/TEI:TEI/TEI:teiHeader | |
206 else () | 215 else () |
207 return $result | 216 return $result |
208 }; | 217 }; |
209 | 218 |
210 declare function mpdl-lucene:getText($docBase, $doc) { | 219 declare function mpdl-lucene:getText($docBase, $doc) { |
211 let $result := | 220 let $result := |
212 if ($docBase = 'archimedes') | 221 if ($docBase = 'archimedes') |
213 then $doc/archimedes/text | 222 then $doc/archimedes/text |
214 else if ($docBase = 'echo') | 223 else if ($docBase = 'echo') |
215 then $doc/echo:echo/echo:text | 224 then $doc/echo:echo/echo:text |
225 else if ($docBase = 'tei') | |
226 then $doc/TEI:TEI/TEI:text | |
216 else () | 227 else () |
217 return $result | 228 return $result |
218 }; | 229 }; |
219 | 230 |
220 | 231 |
229 }; | 240 }; |
230 | 241 |
231 declare function mpdl-lucene:getAttrQueryStr($attribute, $attrValue) { | 242 declare function mpdl-lucene:getAttrQueryStr($attribute, $attrValue) { |
232 let $attrArch := mpdl-lucene:getElemNameByAttr("archimedes", $attribute) | 243 let $attrArch := mpdl-lucene:getElemNameByAttr("archimedes", $attribute) |
233 let $attrEcho := mpdl-lucene:getElemNameByAttr("echo", $attribute) | 244 let $attrEcho := mpdl-lucene:getElemNameByAttr("echo", $attribute) |
245 let $attrTei := mpdl-lucene:getElemNameByAttr("tei", $attribute) | |
234 let $attrArchRelQueryStr := | 246 let $attrArchRelQueryStr := |
235 if ($attribute = "date") | 247 if ($attribute = "date") |
236 then concat($attrArch, " = '", $attrValue, "'") | 248 then concat($attrArch, " = '", $attrValue, "'") |
237 else concat("ft:query(", $attrArch, ", '", $attrValue, "')") | 249 else concat("ft:query(", $attrArch, ", '", $attrValue, "')") |
238 let $attrEchoRelQueryStr := | 250 let $attrEchoRelQueryStr := |
239 if ($attribute = "date") | 251 if ($attribute = "date") |
240 then concat($attrEcho, " = '", $attrValue, "'") | 252 then concat($attrEcho, " = '", $attrValue, "'") |
241 else concat("ft:query(", $attrEcho, ", '", $attrValue, "')") | 253 else concat("ft:query(", $attrEcho, ", '", $attrValue, "')") |
254 let $attrTeiRelQueryStr := | |
255 if ($attribute = "date") | |
256 then concat($attrTei, " = '", $attrValue, "'") | |
257 else concat("ft:query(", $attrTei, ", '", $attrValue, "')") | |
242 let $result := | 258 let $result := |
243 if ($attrArch != "" and $attrEcho != "") | 259 if ($attrArch != "" and $attrEcho != "" and $attrTei != "") |
260 then concat($attrArchRelQueryStr, " or ", $attrEchoRelQueryStr, " or ", $attrTeiRelQueryStr) | |
261 else if ($attrArch != "" and $attrEcho != "" and $attrTei = "") | |
244 then concat($attrArchRelQueryStr, " or ", $attrEchoRelQueryStr) | 262 then concat($attrArchRelQueryStr, " or ", $attrEchoRelQueryStr) |
245 else if ($attrArch = "" and $attrEcho != "") | 263 else if ($attrArch != "" and $attrEcho = "" and $attrTei != "") |
264 then concat($attrArchRelQueryStr, " or ", $attrTeiRelQueryStr) | |
265 else if ($attrArch = "" and $attrEcho != "" and $attrTei != "") | |
266 then concat($attrEchoRelQueryStr, " or ", $attrTeiRelQueryStr) | |
267 else if ($attrArch != "" and $attrEcho = "" and $attrTei = "") | |
268 then $attrArchRelQueryStr | |
269 else if ($attrArch = "" and $attrEcho != "" and $attrTei = "") | |
246 then $attrEchoRelQueryStr | 270 then $attrEchoRelQueryStr |
247 else if ($attrArch != "" and $attrEcho = "") | 271 else if ($attrArch = "" and $attrEcho = "" and $attrTei != "") |
248 then $attrArchRelQueryStr | 272 then $attrTeiRelQueryStr |
249 else "" | 273 else "" |
250 return $result | 274 return $result |
251 }; | 275 }; |
252 | 276 |
253 declare function mpdl-lucene:getElemNameByAttr($docBase, $attribute) { | 277 declare function mpdl-lucene:getElemNameByAttr($docBase, $attribute) { |
254 let $docBaseArch := "archimedes" | 278 let $docBaseArch := "archimedes" |
255 let $docBaseEcho := "echo" | 279 let $docBaseEcho := "echo" |
280 let $docBaseTei := "tei" | |
256 let $result := | 281 let $result := |
257 if ($docBase = $docBaseArch and $attribute = "author") | 282 if ($docBase = $docBaseArch and $attribute = "author") |
258 then "author" | 283 then "author" |
259 else if ($docBase = $docBaseEcho and $attribute = "author") | 284 else if ($docBase = $docBaseEcho and $attribute = "author") |
260 then "dcterms:creator" | 285 then "dcterms:creator" |
286 else if ($docBase = $docBaseTei and $attribute = "author") | |
287 then "TEI:fileDesc/TEI:titleStmt/TEI:author" | |
261 else if ($docBase = $docBaseArch and $attribute = "title") | 288 else if ($docBase = $docBaseArch and $attribute = "title") |
262 then "title" | 289 then "title" |
263 else if ($docBase = $docBaseEcho and $attribute = "title") | 290 else if ($docBase = $docBaseEcho and $attribute = "title") |
264 then "dcterms:title" | 291 then "dcterms:title" |
292 else if ($docBase = $docBaseTei and $attribute = "title") | |
293 then "TEI:fileDesc/TEI:titleStmt/TEI:title" | |
265 else if ($docBase = $docBaseArch and $attribute = "place") | 294 else if ($docBase = $docBaseArch and $attribute = "place") |
266 then "place" | 295 then "place" |
267 else if ($docBase = $docBaseEcho and $attribute = "place") | 296 else if ($docBase = $docBaseEcho and $attribute = "place") |
268 then "" | 297 then "" |
298 else if ($docBase = $docBaseTei and $attribute = "place") | |
299 then "TEI:fileDesc/TEI:publicationStmt/TEI:pubPlace" | |
269 else if ($docBase = $docBaseArch and $attribute = "date") | 300 else if ($docBase = $docBaseArch and $attribute = "date") |
270 then "date" | 301 then "date" |
271 else if ($docBase = $docBaseEcho and $attribute = "date") | 302 else if ($docBase = $docBaseEcho and $attribute = "date") |
272 then "dcterms:date" | 303 then "dcterms:date" |
304 else if ($docBase = $docBaseTei and $attribute = "date") | |
305 then "TEI:fileDesc/TEI:publicationStmt/TEI:date" | |
273 else if ($docBase = $docBaseArch and $attribute = "language") | 306 else if ($docBase = $docBaseArch and $attribute = "language") |
274 then "lang" | 307 then "lang" |
275 else if ($docBase = $docBaseEcho and $attribute = "language") | 308 else if ($docBase = $docBaseEcho and $attribute = "language") |
276 then "dcterms:language" | 309 then "dcterms:language" |
310 else if ($docBase = $docBaseTei and $attribute = "language") | |
311 then "TEI:profileDesc/TEI:langUsage/TEI:language/@ident" | |
277 else if ($docBase = $docBaseArch and $attribute = "identifier") | 312 else if ($docBase = $docBaseArch and $attribute = "identifier") |
278 then "locator" | 313 then "locator" |
279 else if ($docBase = $docBaseEcho and $attribute = "identifier") | 314 else if ($docBase = $docBaseEcho and $attribute = "identifier") |
280 then "dcterms:identifier" | 315 then "dcterms:identifier" |
316 else if ($docBase = $docBaseTei and $attribute = "identifier") | |
317 then "identifier" | |
281 else if ($docBase = $docBaseArch and $attribute = "rights") | 318 else if ($docBase = $docBaseArch and $attribute = "rights") |
282 then "" | 319 then "" |
283 else if ($docBase = $docBaseEcho and $attribute = "rights") | 320 else if ($docBase = $docBaseEcho and $attribute = "rights") |
284 then "dcterms:rights" | 321 then "dcterms:rights" |
322 else if ($docBase = $docBaseTei and $attribute = "rights") | |
323 then "rights" | |
285 else if ($docBase = $docBaseArch and $attribute = "license") | 324 else if ($docBase = $docBaseArch and $attribute = "license") |
286 then "" | 325 then "" |
287 else if ($docBase = $docBaseEcho and $attribute = "license") | 326 else if ($docBase = $docBaseEcho and $attribute = "license") |
288 then "dcterms:license" | 327 then "dcterms:license" |
328 else if ($docBase = $docBaseTei and $attribute = "license") | |
329 then "" | |
289 else if ($docBase = $docBaseArch and $attribute = "accessRights") | 330 else if ($docBase = $docBaseArch and $attribute = "accessRights") |
290 then "" | 331 then "" |
291 else if ($docBase = $docBaseEcho and $attribute = "accessRights") | 332 else if ($docBase = $docBaseEcho and $attribute = "accessRights") |
292 then "dcterms:accessRights" | 333 then "dcterms:accessRights" |
334 else if ($docBase = $docBaseTei and $attribute = "accessRights") | |
335 then "accessRights" | |
293 else if ($docBase = $docBaseArch and $attribute = "file") | 336 else if ($docBase = $docBaseArch and $attribute = "file") |
294 then "cvs_file" | 337 then "cvs_file" |
295 else if ($docBase = $docBaseEcho and $attribute = "file") | 338 else if ($docBase = $docBaseEcho and $attribute = "file") |
296 then "" | 339 then "" |
340 else if ($docBase = $docBaseTei and $attribute = "file") | |
341 then "" | |
297 else if ($docBase = $docBaseArch and $attribute = "translator") | 342 else if ($docBase = $docBaseArch and $attribute = "translator") |
298 then "translator" | 343 then "translator" |
299 else if ($docBase = $docBaseEcho and $attribute = "translator") | 344 else if ($docBase = $docBaseEcho and $attribute = "translator") |
300 then "" | 345 then "" |
346 else if ($docBase = $docBaseTei and $attribute = "translator") | |
347 then "" | |
301 else if ($docBase = $docBaseArch and $attribute = "version") | 348 else if ($docBase = $docBaseArch and $attribute = "version") |
302 then "cvs_version" | 349 then "cvs_version" |
303 else if ($docBase = $docBaseEcho and $attribute = "version") | 350 else if ($docBase = $docBaseEcho and $attribute = "version") |
304 then "" | 351 then "" |
352 else if ($docBase = $docBaseTei and $attribute = "version") | |
353 then "" | |
305 else "" | 354 else "" |
306 | 355 |
307 return $result | 356 return $result |
308 }; | 357 }; |
309 | 358 |
310 declare function mpdl-lucene:getElementsByAttr($metadata, $docBase, $attribute) { | 359 declare function mpdl-lucene:getElementsByAttr($metadata, $docBase, $attribute) { |
311 let $docBaseArch := "archimedes" | 360 let $docBaseArch := "archimedes" |
312 let $docBaseEcho := "echo" | 361 let $docBaseEcho := "echo" |
362 let $docBaseTei := "tei" | |
313 let $result := | 363 let $result := |
314 if ($docBase = $docBaseArch and $attribute = "author") | 364 if ($docBase = $docBaseArch and $attribute = "author") |
315 then | 365 then |
316 for $elem in $metadata/author | 366 for $elem in $metadata/author |
317 return <author>{$elem}</author> | 367 return <author>{$elem}</author> |
318 else if ($docBase = $docBaseEcho and $attribute = "author") | 368 else if ($docBase = $docBaseEcho and $attribute = "author") |
319 then | 369 then |
320 for $elem in $metadata/dcterms:creator | 370 for $elem in $metadata/dcterms:creator |
321 return <author>{$elem}</author> | 371 return <author>{$elem}</author> |
372 else if ($docBase = $docBaseTei and $attribute = "author") | |
373 then | |
374 for $elem in $metadata/TEI:fileDesc/TEI:titleStmt/TEI:author | |
375 return <author>{$elem}</author> | |
322 else if ($docBase = $docBaseArch and $attribute = "title") | 376 else if ($docBase = $docBaseArch and $attribute = "title") |
323 then | 377 then |
324 for $elem in $metadata/title | 378 for $elem in $metadata/title |
325 return <title>{$elem}</title> | 379 return <title>{$elem}</title> |
326 else if ($docBase = $docBaseEcho and $attribute = "title") | 380 else if ($docBase = $docBaseEcho and $attribute = "title") |
327 then | 381 then |
328 for $elem in $metadata/dcterms:title | 382 for $elem in $metadata/dcterms:title |
329 return <title>{$elem}</title> | 383 return <title>{$elem}</title> |
384 else if ($docBase = $docBaseTei and $attribute = "title") | |
385 then | |
386 for $elem in $metadata/TEI:fileDesc/TEI:titleStmt/TEI:title | |
387 return <title>{$elem}</title> | |
330 else if ($docBase = $docBaseArch and $attribute = "place") | 388 else if ($docBase = $docBaseArch and $attribute = "place") |
331 then | 389 then |
332 for $elem in $metadata/place | 390 for $elem in $metadata/place |
333 return <place>{$elem}</place> | 391 return <place>{$elem}</place> |
334 else if ($docBase = $docBaseEcho and $attribute = "place") | 392 else if ($docBase = $docBaseEcho and $attribute = "place") |
338 for $elem in $metadata/date | 396 for $elem in $metadata/date |
339 return <date>{$elem}</date> | 397 return <date>{$elem}</date> |
340 else if ($docBase = $docBaseEcho and $attribute = "date") | 398 else if ($docBase = $docBaseEcho and $attribute = "date") |
341 then | 399 then |
342 for $elem in $metadata/dcterms:date | 400 for $elem in $metadata/dcterms:date |
401 return <date>{$elem}</date> | |
402 else if ($docBase = $docBaseTei and $attribute = "date") | |
403 then | |
404 for $elem in $metadata/TEI:fileDesc/TEI:publicationStmt/TEI:date | |
343 return <date>{$elem}</date> | 405 return <date>{$elem}</date> |
344 else if ($docBase = $docBaseArch and $attribute = "language") | 406 else if ($docBase = $docBaseArch and $attribute = "language") |
345 then $metadata/lang | 407 then $metadata/lang |
346 else if ($docBase = $docBaseEcho and $attribute = "language") | 408 else if ($docBase = $docBaseEcho and $attribute = "language") |
347 then $metadata/dcterms:language | 409 then $metadata/dcterms:language |
410 else if ($docBase = $docBaseTei and $attribute = "language") | |
411 then $metadata/TEI:profileDesc/TEI:langUsage/TEI:language/@ident | |
348 else if ($docBase = $docBaseArch and $attribute = "identifier") | 412 else if ($docBase = $docBaseArch and $attribute = "identifier") |
349 then $metadata/locator | 413 then $metadata/locator |
350 else if ($docBase = $docBaseEcho and $attribute = "identifier") | 414 else if ($docBase = $docBaseEcho and $attribute = "identifier") |
351 then $metadata/dcterms:identifier | 415 then $metadata/dcterms:identifier |
352 else if ($docBase = $docBaseArch and $attribute = "rights") | 416 else if ($docBase = $docBaseArch and $attribute = "rights") |
353 then () | 417 then () |
354 else if ($docBase = $docBaseEcho and $attribute = "rights") | 418 else if ($docBase = $docBaseEcho and $attribute = "rights") |
355 then | 419 then |
356 for $elem in $metadata/dcterms:rights | 420 for $elem in $metadata/dcterms:rights |
357 return <rights>{$elem}</rights> | 421 return <rights>{$elem}</rights> |
422 else if ($docBase = $docBaseTei and $attribute = "rights") | |
423 then $metadata/TEI:fileDesc/TEI:publicationStmt/TEI:availability | |
358 else if ($docBase = $docBaseArch and $attribute = "accessRights") | 424 else if ($docBase = $docBaseArch and $attribute = "accessRights") |
359 then () | 425 then () |
360 else if ($docBase = $docBaseEcho and $attribute = "accessRights") | 426 else if ($docBase = $docBaseEcho and $attribute = "accessRights") |
361 then | 427 then |
362 for $elem in $metadata/dcterms:accessRights | 428 for $elem in $metadata/dcterms:accessRights |
363 return <rights>{$elem}</rights> | 429 return <rights>{$elem}</rights> |
430 else if ($docBase = $docBaseTei and $attribute = "accessRights") | |
431 then $metadata/TEI:fileDesc/TEI:publicationStmt/TEI:availability/@status | |
364 else if ($docBase = $docBaseArch and $attribute = "license") | 432 else if ($docBase = $docBaseArch and $attribute = "license") |
365 then () | 433 then () |
366 else if ($docBase = $docBaseEcho and $attribute = "license") | 434 else if ($docBase = $docBaseEcho and $attribute = "license") |
367 then | 435 then |
368 for $elem in $metadata/dcterms:license | 436 for $elem in $metadata/dcterms:license |