| 1 | = Search-Service = |
| 2 | |
| 3 | Versuch mit Solr4 |
| 4 | |
| 5 | Config File (data-config.xml): |
| 6 | |
| 7 | {{{ |
| 8 | <dataConfig> |
| 9 | <dataSource type="FileDataSource" encoding="UTF-8" /> |
| 10 | <document> |
| 11 | <entity name="f" processor="FileListEntityProcessor" excludes="^\..*" baseDir="/Volumes/online_permanent/einstein/annalen" fileName=".*\.meta" recursive="true" rootEntity="false" dataSource="null"> |
| 12 | <entity name="page" |
| 13 | processor="de.mpiwg.itgroup.solr.transformer.ExtendedXPathEntityProcessor" |
| 14 | stream="true" |
| 15 | |
| 16 | url="${f.fileAbsolutePath}" |
| 17 | transformer="RegexTransformer,DateFormatTransformer" |
| 18 | xsl="/Users/dwinter/Documents/Projekte/MetaDataManagement/testData/indexMeta_to_field.xsl" |
| 19 | useSolrAddSchema="true" |
| 20 | > |
| 21 | |
| 22 | </entity> |
| 23 | </entity> |
| 24 | </document> |
| 25 | </dataConfig> |
| 26 | |
| 27 | }}} |
| 28 | |
| 29 | indexMeta_to_field.xsl konvertiert index.meta files in das doc-format zum Indizieren. Alle Einträge in bib werden dazu in Felder umgewandelt mit dem Prefix |
| 30 | "IM_". Ausserdem werden alle Felder noch in ein Feld "all-bib-data" gemappt. |
| 31 | |
| 32 | {{{ |
| 33 | <?xml version="1.0" encoding="UTF-8"?> |
| 34 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
| 35 | version="1.0"> |
| 36 | <xsl:template match="/"> |
| 37 | <add> |
| 38 | <doc> |
| 39 | <field name="archive-path"><xsl:value-of select="/resource/archive-path"/> |
| 40 | </field> |
| 41 | <xsl:for-each select="/resource/meta/bib//*"> |
| 42 | <field><xsl:attribute name="name">IM_<xsl:value-of select="name()"/> |
| 43 | </xsl:attribute><xsl:value-of select="."/></field> |
| 44 | <field name="all-bib-data"><xsl:value-of select="."/></field> |
| 45 | |
| 46 | </xsl:for-each> |
| 47 | </doc> |
| 48 | </add> |
| 49 | </xsl:template> |
| 50 | </xsl:stylesheet> |
| 51 | |
| 52 | }}} |
| 53 | |
| 54 | |
| 55 | In schema.xml: |
| 56 | {{{ |
| 57 | |
| 58 | <field name="all-bib-data" type="text_general" indexed="true" stored="true" multiValued="true"/> |
| 59 | <field name="archive-path" type="string" indexed="true" stored="true" required="true" multiValued="false" /> |
| 60 | <dynamicField name="IM_*" type="text_general" indexed="true" stored="true"/> |
| 61 | |
| 62 | <uniqueKey>archive-path</uniqueKey> |
| 63 | }}} |
| 64 | |
| 65 | ExtendedXPathEntityProcessor ist eine fehlertolerantere Erweiterung von XPathEntityProcessor. |
| 66 | |
| 67 | {{{ |
| 68 | package de.mpiwg.itgroup.solr.transformer; |
| 69 | |
| 70 | import java.util.Map; |
| 71 | |
| 72 | import org.apache.solr.handler.dataimport.XPathEntityProcessor; |
| 73 | |
| 74 | public class ExtendedXPathEntityProcessor extends XPathEntityProcessor { |
| 75 | |
| 76 | public Map<String,Object> nextRow(){ |
| 77 | |
| 78 | |
| 79 | |
| 80 | |
| 81 | Map<String, Object> r; |
| 82 | try { |
| 83 | r = super.nextRow(); |
| 84 | } catch (Exception e) { |
| 85 | // TODO Auto-generated catch block |
| 86 | e.printStackTrace(); |
| 87 | r = null; |
| 88 | } |
| 89 | |
| 90 | return r; |
| 91 | } |
| 92 | } |
| 93 | }}} |