changeset 30:bcd8076ff7ec

random selection of entries bug fixes
author dwinter
date Wed, 05 Jun 2013 17:37:09 +0200
parents 7027fbf1d141
children 0190f49bce88
files addDriToIndexMeta.py managePurls/manageIndexMetaPURLs.py restService/getPurls.py restService/images.py restService/restService.py
diffstat 5 files changed, 93 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/addDriToIndexMeta.py	Fri May 24 16:53:09 2013 +0200
+++ b/addDriToIndexMeta.py	Wed Jun 05 17:37:09 2013 +0200
@@ -7,6 +7,7 @@
 import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs 
 import re
 from lxml import etree
+import sys
 
 from os.path import join, getsize
 
@@ -14,14 +15,36 @@
 parseErrorFile = file("/tmp/addDRIParseErrors.txt","w")
 alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w")
 
+
+def correctAuthor(tree):
+    """ersetzt in den autor felder "\r" durch ;"""
+    
+    
+    authors = tree.xpath("/resource/meta/bib/author")
+    for author in authors:
+
+        if author.text is not None:
+            splitted =author.text.split("\n")
+            txt = "; ".join(splitted)
+    
+            author.text=txt
+    
+       
+    
 def addPURL(fl,purl,test=False):
     try:
         tree = etree.parse(fl)
     except:
         parseErrorFile.write("PARSE ERROR:"+fl+"\n")
         return False
+    
     dris = tree.xpath("/resource/meta/dri[@type='mpiwg']")
     
+    
+    correctAuthor(tree)
+    
+    
+    
     if len(dris)==0: # erzeuge neu
         newDri = etree.Element("dri",type="mpiwg")
         newDri.text=purl
@@ -34,18 +57,23 @@
     else:
         dris[0].text=purl
         alreadyExistsFile.write("%s \n"%fl)
-        return True
+        #return True
 
     print etree.tostring(tree, pretty_print=True)
     
+    
     if not test:
         try:
+          
             os.rename(fl, fl+"_mpiwg_dri")
             out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False)
             fo = file(fl,"w")
             fo.write(out)
             fo.close
         except:
+            
+            print sys.exc_info()[0]
+            print sys.exc_info()[1]
             errorFile.write(fl+"\n")
     return True
     
@@ -73,4 +101,4 @@
                 dirs.remove(dir)
 
 if __name__ == '__main__':
-     addDriToIndexMeta("/mpiwg/online/",delpath="/mpiwg/online",test=True)
+     addDriToIndexMeta("/mpiwg/online/permanent/vlp",delpath="/mpiwg/online",test=False)
--- a/managePurls/manageIndexMetaPURLs.py	Fri May 24 16:53:09 2013 +0200
+++ b/managePurls/manageIndexMetaPURLs.py	Wed Jun 05 17:37:09 2013 +0200
@@ -225,8 +225,35 @@
                 return ERROR,None
 
 
-
-
+    def getExistingRandom(self,number):
+        """gibt zufaellig existierende purls zurueck"""
+        
+        qst = "select count(*) from purls"
+        max = self.purlDB.query(qst)[0].count
+        
+        random.seed()
+        
+        ret=set()
+        
+        while len(ret)<number:
+            zuf = random.randrange(max-1)
+            
+            qst="select purl from purls OFFSET %s LIMIT 1"%zuf
+            purl=self.purlDB.query(qst)[0].purl
+            ret.add(purl)
+            
+        return [x for x in ret]
+        
+    def getLastEntries(self,number):
+        
+        qst ="select purl from purls order by created_at limit %s"%number
+        purls=self.purlDB.query(qst)
+        ret=[]
+        for purl in purls:
+            ret.append(purl.purl)
+            
+        return ret
+        
 if __name__ == '__main__':
     
     im = IndexMetaPURLManager()
--- a/restService/getPurls.py	Fri May 24 16:53:09 2013 +0200
+++ b/restService/getPurls.py	Wed Jun 05 17:37:09 2013 +0200
@@ -10,6 +10,7 @@
 import re
 import config
 from managePurls.manageIndexMetaPURLs import IndexMetaPURLManager
+import json
 
 class getPurls:
     
@@ -62,3 +63,28 @@
         
         return self.render.registeredPurlsResponse(purls)
         
+
+class randomSearch:
+    def __init__(self):
+      
+        self.purlManager = IndexMetaPURLManager()
+  
+    def GET(self):
+        lst = self.purlManager.getExistingRandom(3);
+        
+        return json.dumps(lst)
+         
+
+class lastEntries:
+    def __init__(self):
+      
+        self.purlManager = IndexMetaPURLManager()
+  
+    def GET(self):
+        lst = self.purlManager.getLastEntries(3)
+        
+        return json.dumps(lst)
+         
+         
+         
+        
--- a/restService/images.py	Fri May 24 16:53:09 2013 +0200
+++ b/restService/images.py	Wed Jun 05 17:37:09 2013 +0200
@@ -36,7 +36,8 @@
 
         path=doc.get('TT_image',None)
         
-        if not isinstance(path, basestring): #TT_image was  defined as multiple , shouldn't be the case ?
+        
+        if (path is not None) and (not isinstance(path, basestring)): #TT_image was  defined as multiple , shouldn't be the case ?
             path=path[0]
         
       
--- a/restService/restService.py	Fri May 24 16:53:09 2013 +0200
+++ b/restService/restService.py	Wed Jun 05 17:37:09 2013 +0200
@@ -10,6 +10,9 @@
 from redirector import redirector 
 import logging
 from searcher import searcher
+from getPurls import randomSearch
+from getPurls import lastEntries
+
 from searchService.searchLines import searchLines
 from getPurls import getPurls
 from searchService.searchSolr import searchSolr
@@ -31,7 +34,9 @@
     '/searchSolr','searchSolr',
     '/imagePath/(.+)','imagePath',
     '/imageURL/(.+)','imageURL',
-    '/image/(.+)','image'
+    '/image/(.+)','image',
+    '/random','randomSearch',
+    '/last','lastEntries'
 )