comparison software/eXist/webapp/mpdl/lucene/search.xql @ 11:d6f528ad5d96

TEI Unterst?tzung, Fehlerbehebungen, externe Objekte
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Fri, 11 Mar 2011 13:34:02 +0100
parents 5589d865af7a
children e99964f390e4
comparison
equal deleted inserted replaced
10:59ff47d1e237 11:d6f528ad5d96
5 import module namespace functx = "http://www.functx.com" at "../util/functx.xql"; 5 import module namespace functx = "http://www.functx.com" at "../util/functx.xql";
6 6
7 declare namespace ft = "http://exist-db.org/xquery/lucene"; 7 declare namespace ft = "http://exist-db.org/xquery/lucene";
8 8
9 declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/"; 9 declare namespace echo="http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/";
10 declare namespace TEI="http://www.tei-c.org/ns/1.0";
10 11
11 declare namespace dc="http://purl.org/dc/elements/1.1/"; 12 declare namespace dc="http://purl.org/dc/elements/1.1/";
12 declare namespace dcterms="http://purl.org/dc/terms"; 13 declare namespace dcterms="http://purl.org/dc/terms";
13 14
14 declare function mpdl-lucene:search($mpdlCollection, $queryStr) { 15 declare function mpdl-lucene:search($mpdlCollection, $queryStr) {
15 let $luceneParseResult := mpdltext:lucene-parse-query($queryStr) 16 let $luceneParseResult := mpdltext:lucene-parse-query($queryStr)
16 let $result := 17 let $result :=
17 if ($luceneParseResult = '') 18 if ($luceneParseResult = '')
18 then $mpdlCollection/.[ft:query(archimedes/text, $queryStr) or ft:query(echo:echo/echo:text, $queryStr)] 19 then $mpdlCollection/.[ft:query(archimedes/text, $queryStr) or ft:query(echo:echo/echo:text, $queryStr) or ft:query(TEI:TEI/TEI:text, $queryStr)]
19 else 20 else
20 <result> 21 <result>
21 <error>{$luceneParseResult}</error> 22 <error>{$luceneParseResult}</error>
22 <size>0</size> 23 <size>0</size>
23 <pages>0</pages> 24 <pages>0</pages>
39 let $pageBreaks := 40 let $pageBreaks :=
40 if ($mpdlCollectionName = 'archimedes') 41 if ($mpdlCollectionName = 'archimedes')
41 then $document//pb 42 then $document//pb
42 else if ($mpdlCollectionName = 'echo') 43 else if ($mpdlCollectionName = 'echo')
43 then $document//echo:pb 44 then $document//echo:pb
45 else if ($mpdlCollectionName = 'tei')
46 then $document//TEI:pb
44 else $document//pb 47 else $document//pb
45 let $luceneParseResult := mpdltext:lucene-parse-query($queryStr) 48 let $luceneParseResult := mpdltext:lucene-parse-query($queryStr)
46 let $t := 49 let $t :=
47 if ($luceneParseResult != '') 50 if ($luceneParseResult != '')
48 then () 51 then ()
49 else if ($mpdlCollectionName = 'archimedes') 52 else if ($mpdlCollectionName = 'archimedes')
50 then $document//s[ft:query(., $query)] 53 then $document//s[ft:query(., $query)]
51 else if ($mpdlCollectionName = 'echo') 54 else if ($mpdlCollectionName = 'echo')
52 then $document//echo:s[ft:query(., $query)] 55 then $document//echo:s[ft:query(., $query)]
56 else if ($mpdlCollectionName = 'tei')
57 then $document//TEI:s[ft:query(., $query)]
53 else $document//s[ft:query(., $query)] 58 else $document//s[ft:query(., $query)]
54 let $from := ($pn * $pageSize) - $pageSize + 1 59 let $from := ($pn * $pageSize) - $pageSize + 1
55 let $to := $pn * $pageSize 60 let $to := $pn * $pageSize
56 (: performance improvements: result set of 500 needs 3 sec., result set of 10 needs 0,7 sec.:) 61 (: performance improvements: result set of 500 needs 3 sec., result set of 10 needs 0,7 sec.:)
57 let $tempQueryResult := 62 let $tempQueryResult :=
71 let $posOfS := (: faster: comparison only in s elements of this document :) 76 let $posOfS := (: faster: comparison only in s elements of this document :)
72 if ($mpdlCollectionName = 'archimedes') 77 if ($mpdlCollectionName = 'archimedes')
73 then count($pb/following::s[. << $s]) + 1 78 then count($pb/following::s[. << $s]) + 1
74 else if ($mpdlCollectionName = 'echo') 79 else if ($mpdlCollectionName = 'echo')
75 then count($pb/following::echo:s[. << $s]) + 1 80 then count($pb/following::echo:s[. << $s]) + 1
81 else if ($mpdlCollectionName = 'tei')
82 then count($pb/following::TEI:s[. << $s]) + 1
76 else count($pb/following::s[. << $s]) + 1 83 else count($pb/following::s[. << $s]) + 1
77 let $position := $from - 1 + $pos 84 let $position := $from - 1 + $pos
78 let $resultElem := 85 let $resultElem :=
79 <hit> 86 <hit>
80 <pos>{$position}</pos> 87 <pos>{$position}</pos>
201 let $result := 208 let $result :=
202 if ($docBase = 'archimedes') 209 if ($docBase = 'archimedes')
203 then $doc/archimedes/info 210 then $doc/archimedes/info
204 else if ($docBase = 'echo') 211 else if ($docBase = 'echo')
205 then $doc/echo:echo/echo:metadata 212 then $doc/echo:echo/echo:metadata
213 else if ($docBase = 'tei')
214 then $doc/TEI:TEI/TEI:teiHeader
206 else () 215 else ()
207 return $result 216 return $result
208 }; 217 };
209 218
210 declare function mpdl-lucene:getText($docBase, $doc) { 219 declare function mpdl-lucene:getText($docBase, $doc) {
211 let $result := 220 let $result :=
212 if ($docBase = 'archimedes') 221 if ($docBase = 'archimedes')
213 then $doc/archimedes/text 222 then $doc/archimedes/text
214 else if ($docBase = 'echo') 223 else if ($docBase = 'echo')
215 then $doc/echo:echo/echo:text 224 then $doc/echo:echo/echo:text
225 else if ($docBase = 'tei')
226 then $doc/TEI:TEI/TEI:text
216 else () 227 else ()
217 return $result 228 return $result
218 }; 229 };
219 230
220 231
229 }; 240 };
230 241
231 declare function mpdl-lucene:getAttrQueryStr($attribute, $attrValue) { 242 declare function mpdl-lucene:getAttrQueryStr($attribute, $attrValue) {
232 let $attrArch := mpdl-lucene:getElemNameByAttr("archimedes", $attribute) 243 let $attrArch := mpdl-lucene:getElemNameByAttr("archimedes", $attribute)
233 let $attrEcho := mpdl-lucene:getElemNameByAttr("echo", $attribute) 244 let $attrEcho := mpdl-lucene:getElemNameByAttr("echo", $attribute)
245 let $attrTei := mpdl-lucene:getElemNameByAttr("tei", $attribute)
234 let $attrArchRelQueryStr := 246 let $attrArchRelQueryStr :=
235 if ($attribute = "date") 247 if ($attribute = "date")
236 then concat($attrArch, " = '", $attrValue, "'") 248 then concat($attrArch, " = '", $attrValue, "'")
237 else concat("ft:query(", $attrArch, ", '", $attrValue, "')") 249 else concat("ft:query(", $attrArch, ", '", $attrValue, "')")
238 let $attrEchoRelQueryStr := 250 let $attrEchoRelQueryStr :=
239 if ($attribute = "date") 251 if ($attribute = "date")
240 then concat($attrEcho, " = '", $attrValue, "'") 252 then concat($attrEcho, " = '", $attrValue, "'")
241 else concat("ft:query(", $attrEcho, ", '", $attrValue, "')") 253 else concat("ft:query(", $attrEcho, ", '", $attrValue, "')")
254 let $attrTeiRelQueryStr :=
255 if ($attribute = "date")
256 then concat($attrTei, " = '", $attrValue, "'")
257 else concat("ft:query(", $attrTei, ", '", $attrValue, "')")
242 let $result := 258 let $result :=
243 if ($attrArch != "" and $attrEcho != "") 259 if ($attrArch != "" and $attrEcho != "" and $attrTei != "")
260 then concat($attrArchRelQueryStr, " or ", $attrEchoRelQueryStr, " or ", $attrTeiRelQueryStr)
261 else if ($attrArch != "" and $attrEcho != "" and $attrTei = "")
244 then concat($attrArchRelQueryStr, " or ", $attrEchoRelQueryStr) 262 then concat($attrArchRelQueryStr, " or ", $attrEchoRelQueryStr)
245 else if ($attrArch = "" and $attrEcho != "") 263 else if ($attrArch != "" and $attrEcho = "" and $attrTei != "")
264 then concat($attrArchRelQueryStr, " or ", $attrTeiRelQueryStr)
265 else if ($attrArch = "" and $attrEcho != "" and $attrTei != "")
266 then concat($attrEchoRelQueryStr, " or ", $attrTeiRelQueryStr)
267 else if ($attrArch != "" and $attrEcho = "" and $attrTei = "")
268 then $attrArchRelQueryStr
269 else if ($attrArch = "" and $attrEcho != "" and $attrTei = "")
246 then $attrEchoRelQueryStr 270 then $attrEchoRelQueryStr
247 else if ($attrArch != "" and $attrEcho = "") 271 else if ($attrArch = "" and $attrEcho = "" and $attrTei != "")
248 then $attrArchRelQueryStr 272 then $attrTeiRelQueryStr
249 else "" 273 else ""
250 return $result 274 return $result
251 }; 275 };
252 276
253 declare function mpdl-lucene:getElemNameByAttr($docBase, $attribute) { 277 declare function mpdl-lucene:getElemNameByAttr($docBase, $attribute) {
254 let $docBaseArch := "archimedes" 278 let $docBaseArch := "archimedes"
255 let $docBaseEcho := "echo" 279 let $docBaseEcho := "echo"
280 let $docBaseTei := "tei"
256 let $result := 281 let $result :=
257 if ($docBase = $docBaseArch and $attribute = "author") 282 if ($docBase = $docBaseArch and $attribute = "author")
258 then "author" 283 then "author"
259 else if ($docBase = $docBaseEcho and $attribute = "author") 284 else if ($docBase = $docBaseEcho and $attribute = "author")
260 then "dcterms:creator" 285 then "dcterms:creator"
286 else if ($docBase = $docBaseTei and $attribute = "author")
287 then "TEI:fileDesc/TEI:titleStmt/TEI:author"
261 else if ($docBase = $docBaseArch and $attribute = "title") 288 else if ($docBase = $docBaseArch and $attribute = "title")
262 then "title" 289 then "title"
263 else if ($docBase = $docBaseEcho and $attribute = "title") 290 else if ($docBase = $docBaseEcho and $attribute = "title")
264 then "dcterms:title" 291 then "dcterms:title"
292 else if ($docBase = $docBaseTei and $attribute = "title")
293 then "TEI:fileDesc/TEI:titleStmt/TEI:title"
265 else if ($docBase = $docBaseArch and $attribute = "place") 294 else if ($docBase = $docBaseArch and $attribute = "place")
266 then "place" 295 then "place"
267 else if ($docBase = $docBaseEcho and $attribute = "place") 296 else if ($docBase = $docBaseEcho and $attribute = "place")
268 then "" 297 then ""
298 else if ($docBase = $docBaseTei and $attribute = "place")
299 then "TEI:fileDesc/TEI:publicationStmt/TEI:pubPlace"
269 else if ($docBase = $docBaseArch and $attribute = "date") 300 else if ($docBase = $docBaseArch and $attribute = "date")
270 then "date" 301 then "date"
271 else if ($docBase = $docBaseEcho and $attribute = "date") 302 else if ($docBase = $docBaseEcho and $attribute = "date")
272 then "dcterms:date" 303 then "dcterms:date"
304 else if ($docBase = $docBaseTei and $attribute = "date")
305 then "TEI:fileDesc/TEI:publicationStmt/TEI:date"
273 else if ($docBase = $docBaseArch and $attribute = "language") 306 else if ($docBase = $docBaseArch and $attribute = "language")
274 then "lang" 307 then "lang"
275 else if ($docBase = $docBaseEcho and $attribute = "language") 308 else if ($docBase = $docBaseEcho and $attribute = "language")
276 then "dcterms:language" 309 then "dcterms:language"
310 else if ($docBase = $docBaseTei and $attribute = "language")
311 then "TEI:profileDesc/TEI:langUsage/TEI:language/@ident"
277 else if ($docBase = $docBaseArch and $attribute = "identifier") 312 else if ($docBase = $docBaseArch and $attribute = "identifier")
278 then "locator" 313 then "locator"
279 else if ($docBase = $docBaseEcho and $attribute = "identifier") 314 else if ($docBase = $docBaseEcho and $attribute = "identifier")
280 then "dcterms:identifier" 315 then "dcterms:identifier"
316 else if ($docBase = $docBaseTei and $attribute = "identifier")
317 then "identifier"
281 else if ($docBase = $docBaseArch and $attribute = "rights") 318 else if ($docBase = $docBaseArch and $attribute = "rights")
282 then "" 319 then ""
283 else if ($docBase = $docBaseEcho and $attribute = "rights") 320 else if ($docBase = $docBaseEcho and $attribute = "rights")
284 then "dcterms:rights" 321 then "dcterms:rights"
322 else if ($docBase = $docBaseTei and $attribute = "rights")
323 then "rights"
285 else if ($docBase = $docBaseArch and $attribute = "license") 324 else if ($docBase = $docBaseArch and $attribute = "license")
286 then "" 325 then ""
287 else if ($docBase = $docBaseEcho and $attribute = "license") 326 else if ($docBase = $docBaseEcho and $attribute = "license")
288 then "dcterms:license" 327 then "dcterms:license"
328 else if ($docBase = $docBaseTei and $attribute = "license")
329 then ""
289 else if ($docBase = $docBaseArch and $attribute = "accessRights") 330 else if ($docBase = $docBaseArch and $attribute = "accessRights")
290 then "" 331 then ""
291 else if ($docBase = $docBaseEcho and $attribute = "accessRights") 332 else if ($docBase = $docBaseEcho and $attribute = "accessRights")
292 then "dcterms:accessRights" 333 then "dcterms:accessRights"
334 else if ($docBase = $docBaseTei and $attribute = "accessRights")
335 then "accessRights"
293 else if ($docBase = $docBaseArch and $attribute = "file") 336 else if ($docBase = $docBaseArch and $attribute = "file")
294 then "cvs_file" 337 then "cvs_file"
295 else if ($docBase = $docBaseEcho and $attribute = "file") 338 else if ($docBase = $docBaseEcho and $attribute = "file")
296 then "" 339 then ""
340 else if ($docBase = $docBaseTei and $attribute = "file")
341 then ""
297 else if ($docBase = $docBaseArch and $attribute = "translator") 342 else if ($docBase = $docBaseArch and $attribute = "translator")
298 then "translator" 343 then "translator"
299 else if ($docBase = $docBaseEcho and $attribute = "translator") 344 else if ($docBase = $docBaseEcho and $attribute = "translator")
300 then "" 345 then ""
346 else if ($docBase = $docBaseTei and $attribute = "translator")
347 then ""
301 else if ($docBase = $docBaseArch and $attribute = "version") 348 else if ($docBase = $docBaseArch and $attribute = "version")
302 then "cvs_version" 349 then "cvs_version"
303 else if ($docBase = $docBaseEcho and $attribute = "version") 350 else if ($docBase = $docBaseEcho and $attribute = "version")
304 then "" 351 then ""
352 else if ($docBase = $docBaseTei and $attribute = "version")
353 then ""
305 else "" 354 else ""
306 355
307 return $result 356 return $result
308 }; 357 };
309 358
310 declare function mpdl-lucene:getElementsByAttr($metadata, $docBase, $attribute) { 359 declare function mpdl-lucene:getElementsByAttr($metadata, $docBase, $attribute) {
311 let $docBaseArch := "archimedes" 360 let $docBaseArch := "archimedes"
312 let $docBaseEcho := "echo" 361 let $docBaseEcho := "echo"
362 let $docBaseTei := "tei"
313 let $result := 363 let $result :=
314 if ($docBase = $docBaseArch and $attribute = "author") 364 if ($docBase = $docBaseArch and $attribute = "author")
315 then 365 then
316 for $elem in $metadata/author 366 for $elem in $metadata/author
317 return <author>{$elem}</author> 367 return <author>{$elem}</author>
318 else if ($docBase = $docBaseEcho and $attribute = "author") 368 else if ($docBase = $docBaseEcho and $attribute = "author")
319 then 369 then
320 for $elem in $metadata/dcterms:creator 370 for $elem in $metadata/dcterms:creator
321 return <author>{$elem}</author> 371 return <author>{$elem}</author>
372 else if ($docBase = $docBaseTei and $attribute = "author")
373 then
374 for $elem in $metadata/TEI:fileDesc/TEI:titleStmt/TEI:author
375 return <author>{$elem}</author>
322 else if ($docBase = $docBaseArch and $attribute = "title") 376 else if ($docBase = $docBaseArch and $attribute = "title")
323 then 377 then
324 for $elem in $metadata/title 378 for $elem in $metadata/title
325 return <title>{$elem}</title> 379 return <title>{$elem}</title>
326 else if ($docBase = $docBaseEcho and $attribute = "title") 380 else if ($docBase = $docBaseEcho and $attribute = "title")
327 then 381 then
328 for $elem in $metadata/dcterms:title 382 for $elem in $metadata/dcterms:title
329 return <title>{$elem}</title> 383 return <title>{$elem}</title>
384 else if ($docBase = $docBaseTei and $attribute = "title")
385 then
386 for $elem in $metadata/TEI:fileDesc/TEI:titleStmt/TEI:title
387 return <title>{$elem}</title>
330 else if ($docBase = $docBaseArch and $attribute = "place") 388 else if ($docBase = $docBaseArch and $attribute = "place")
331 then 389 then
332 for $elem in $metadata/place 390 for $elem in $metadata/place
333 return <place>{$elem}</place> 391 return <place>{$elem}</place>
334 else if ($docBase = $docBaseEcho and $attribute = "place") 392 else if ($docBase = $docBaseEcho and $attribute = "place")
338 for $elem in $metadata/date 396 for $elem in $metadata/date
339 return <date>{$elem}</date> 397 return <date>{$elem}</date>
340 else if ($docBase = $docBaseEcho and $attribute = "date") 398 else if ($docBase = $docBaseEcho and $attribute = "date")
341 then 399 then
342 for $elem in $metadata/dcterms:date 400 for $elem in $metadata/dcterms:date
401 return <date>{$elem}</date>
402 else if ($docBase = $docBaseTei and $attribute = "date")
403 then
404 for $elem in $metadata/TEI:fileDesc/TEI:publicationStmt/TEI:date
343 return <date>{$elem}</date> 405 return <date>{$elem}</date>
344 else if ($docBase = $docBaseArch and $attribute = "language") 406 else if ($docBase = $docBaseArch and $attribute = "language")
345 then $metadata/lang 407 then $metadata/lang
346 else if ($docBase = $docBaseEcho and $attribute = "language") 408 else if ($docBase = $docBaseEcho and $attribute = "language")
347 then $metadata/dcterms:language 409 then $metadata/dcterms:language
410 else if ($docBase = $docBaseTei and $attribute = "language")
411 then $metadata/TEI:profileDesc/TEI:langUsage/TEI:language/@ident
348 else if ($docBase = $docBaseArch and $attribute = "identifier") 412 else if ($docBase = $docBaseArch and $attribute = "identifier")
349 then $metadata/locator 413 then $metadata/locator
350 else if ($docBase = $docBaseEcho and $attribute = "identifier") 414 else if ($docBase = $docBaseEcho and $attribute = "identifier")
351 then $metadata/dcterms:identifier 415 then $metadata/dcterms:identifier
352 else if ($docBase = $docBaseArch and $attribute = "rights") 416 else if ($docBase = $docBaseArch and $attribute = "rights")
353 then () 417 then ()
354 else if ($docBase = $docBaseEcho and $attribute = "rights") 418 else if ($docBase = $docBaseEcho and $attribute = "rights")
355 then 419 then
356 for $elem in $metadata/dcterms:rights 420 for $elem in $metadata/dcterms:rights
357 return <rights>{$elem}</rights> 421 return <rights>{$elem}</rights>
422 else if ($docBase = $docBaseTei and $attribute = "rights")
423 then $metadata/TEI:fileDesc/TEI:publicationStmt/TEI:availability
358 else if ($docBase = $docBaseArch and $attribute = "accessRights") 424 else if ($docBase = $docBaseArch and $attribute = "accessRights")
359 then () 425 then ()
360 else if ($docBase = $docBaseEcho and $attribute = "accessRights") 426 else if ($docBase = $docBaseEcho and $attribute = "accessRights")
361 then 427 then
362 for $elem in $metadata/dcterms:accessRights 428 for $elem in $metadata/dcterms:accessRights
363 return <rights>{$elem}</rights> 429 return <rights>{$elem}</rights>
430 else if ($docBase = $docBaseTei and $attribute = "accessRights")
431 then $metadata/TEI:fileDesc/TEI:publicationStmt/TEI:availability/@status
364 else if ($docBase = $docBaseArch and $attribute = "license") 432 else if ($docBase = $docBaseArch and $attribute = "license")
365 then () 433 then ()
366 else if ($docBase = $docBaseEcho and $attribute = "license") 434 else if ($docBase = $docBaseEcho and $attribute = "license")
367 then 435 then
368 for $elem in $metadata/dcterms:license 436 for $elem in $metadata/dcterms:license