changeset 20:cf4503528b5e

searchsolr neue function, zun?chst nur f?r text-url-path
author dwinter
date Tue, 29 Jan 2013 16:51:28 +0100
parents cce127a28fc9
children 90643ccc6545
files restService/restService.py restService/searcher.py searchService/searchSolr.py
diffstat 3 files changed, 120 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/restService/restService.py	Wed Nov 21 15:39:08 2012 +0100
+++ b/restService/restService.py	Tue Jan 29 16:51:28 2013 +0100
@@ -12,6 +12,8 @@
 from searcher import searcher
 from searchService.searchLines import searchLines
 from getPurls import getPurls
+from searchService.searchSolr import searchSolr
+
 import config
 
 
@@ -23,7 +25,8 @@
     '/','serviceDescription',
     '/searchLines','searchLines',
     '/searchLines/annotator/search','searchLines',
-    '/getPurls','getPurls'
+    '/getPurls','getPurls',
+    '/searchSolr','searchSolr'
 )
 
 
--- a/restService/searcher.py	Wed Nov 21 15:39:08 2012 +0100
+++ b/restService/searcher.py	Tue Jan 29 16:51:28 2013 +0100
@@ -17,25 +17,30 @@
         input = web.input()
         
         if not hasattr(input, 'q'):
-            return "usage: ?q=QUERYSTRING"
-        query = input.q
-        
-        purls=self.md.search(query)
-        
-        currentUrl = web.ctx.homepath
-        
-        if purls is None:
-            purls=[]
-        
-        ret="""<div class="results"><div class="purls_found_count">%s</div>"""%len(purls)
-        
-        for purl in purls:
-            ret+="""<div class="purls"><a href="%s">%s</a></div>"""%(currentUrl+"/purl/"+purl['purl'],purl['purl'])
+            return "usage: ?q=QUERYSTRING "
         
         
-        web.header('Content-Type', 'text/html')
-        return ret+"</div>"
+        if hasattr(input, 'q'):
         
+            query = input.q
+            
+            purls=self.md.search(query)
+            
+            currentUrl = web.ctx.homepath
+            
+            if purls is None:
+                purls=[]
+            
+            ret="""<div class="results"><div class="purls_found_count">%s</div>"""%len(purls)
+            
+            for purl in purls:
+                ret+="""<div class="purls"><a href="%s">%s</a></div>"""%(currentUrl+"/purl/"+purl['purl'],purl['purl'])
+            
+            
+            web.header('Content-Type', 'text/html')
+            return ret+"</div>"
+            
+            
          
 if __name__ == '__main__':
     pass
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/searchService/searchSolr.py	Tue Jan 29 16:51:28 2013 +0100
@@ -0,0 +1,95 @@
+'''
+Created on 16.11.2012
+
+@author: dwinter
+'''
+
+import solr
+import web
+import urllib
+import os.path
+import json
+import urllib2
+import logging
+
+#SOLR_SERVER="https://md.mpiwg-berlin.mpg.de/solr"
+SOLR_SERVER="http://127.0.0.1:8983/solr"
+DRI_SERVER="http://md.mpiwg-berlin.mpg.de/"
+
+class searchSolr:
+    
+    def __init__(self):
+        #logging.basicConfig(filename='/tmp/solr.log',level=logging.DEBUG)
+        self.con = solr.SolrConnection(SOLR_SERVER,debug=False)
+        
+        self.search = solr.SearchHandler(self.con,"/collection1/select")
+        
+    
+    def GET(self):
+        paras = web.input()
+        
+        if not hasattr(paras,'text-url-path'):
+            return "usage: ?text-url-path=PATH"
+          
+        if getattr(paras,'format','')== "short":
+            short=True
+        else:
+            short=False
+        
+        
+        queryString=paras.get("text-url-path")
+
+        return self.doGet(queryString,short=short)
+    
+    def doGet(self,queryString,short=False):
+        
+        queryString="""text-url-path:"%s" """%queryString
+        response = self.search(queryString)
+        
+        ret=""
+        hitId=0
+        rows=[]
+
+        ret="<results>"
+        
+        for hit in response:
+            
+            ret+="<result>"
+            
+            if short:
+                key="text-url-path"
+                r=hit.get(key)
+                ret+="""<%s>%s<%s>"""%(key,r,key) 
+            
+            else: 
+                
+                for key in hit.keys():
+                    res=hit.get(key)
+                    if  not isinstance(res, list):
+                        res=[res]
+                    
+                    
+                    
+                    for r in res:
+    
+                        ret+="""<%s>%s<%s>"""%(key,r,key) 
+                
+                #ret.append(hit.get('archive-path'))
+        
+            ret+="</result>"
+        
+        
+        
+        return ret+"</results>"
+    
+
+
+        
+
+
+if __name__ == '__main__':
+    sl = searchSolr()
+    
+    x=sl.doGet("/diverse/de/Einst_Bemer_de_1907.xml",short=True)
+    
+    print x.encode("utf-8")