Mercurial > hg > purlService
changeset 30:bcd8076ff7ec
random selection of entries
bug fixes
author | dwinter |
---|---|
date | Wed, 05 Jun 2013 17:37:09 +0200 |
parents | 7027fbf1d141 |
children | 0190f49bce88 |
files | addDriToIndexMeta.py managePurls/manageIndexMetaPURLs.py restService/getPurls.py restService/images.py restService/restService.py |
diffstat | 5 files changed, 93 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/addDriToIndexMeta.py Fri May 24 16:53:09 2013 +0200 +++ b/addDriToIndexMeta.py Wed Jun 05 17:37:09 2013 +0200 @@ -7,6 +7,7 @@ import managePurls.manageIndexMetaPURLs as manageIndexMetaPURLs import re from lxml import etree +import sys from os.path import join, getsize @@ -14,14 +15,36 @@ parseErrorFile = file("/tmp/addDRIParseErrors.txt","w") alreadyExistsFile = file("/tmp/addDRIalreadyExists.txt","w") + +def correctAuthor(tree): + """ersetzt in den autor felder "\r" durch ;""" + + + authors = tree.xpath("/resource/meta/bib/author") + for author in authors: + + if author.text is not None: + splitted =author.text.split("\n") + txt = "; ".join(splitted) + + author.text=txt + + + def addPURL(fl,purl,test=False): try: tree = etree.parse(fl) except: parseErrorFile.write("PARSE ERROR:"+fl+"\n") return False + dris = tree.xpath("/resource/meta/dri[@type='mpiwg']") + + correctAuthor(tree) + + + if len(dris)==0: # erzeuge neu newDri = etree.Element("dri",type="mpiwg") newDri.text=purl @@ -34,18 +57,23 @@ else: dris[0].text=purl alreadyExistsFile.write("%s \n"%fl) - return True + #return True print etree.tostring(tree, pretty_print=True) + if not test: try: + os.rename(fl, fl+"_mpiwg_dri") out = etree.tostring(tree, encoding="UTF-8",xml_declaration=False) fo = file(fl,"w") fo.write(out) fo.close except: + + print sys.exc_info()[0] + print sys.exc_info()[1] errorFile.write(fl+"\n") return True @@ -73,4 +101,4 @@ dirs.remove(dir) if __name__ == '__main__': - addDriToIndexMeta("/mpiwg/online/",delpath="/mpiwg/online",test=True) + addDriToIndexMeta("/mpiwg/online/permanent/vlp",delpath="/mpiwg/online",test=False)
--- a/managePurls/manageIndexMetaPURLs.py Fri May 24 16:53:09 2013 +0200 +++ b/managePurls/manageIndexMetaPURLs.py Wed Jun 05 17:37:09 2013 +0200 @@ -225,8 +225,35 @@ return ERROR,None - - + def getExistingRandom(self,number): + """gibt zufaellig existierende purls zurueck""" + + qst = "select count(*) from purls" + max = self.purlDB.query(qst)[0].count + + random.seed() + + ret=set() + + while len(ret)<number: + zuf = random.randrange(max-1) + + qst="select purl from purls OFFSET %s LIMIT 1"%zuf + purl=self.purlDB.query(qst)[0].purl + ret.add(purl) + + return [x for x in ret] + + def getLastEntries(self,number): + + qst ="select purl from purls order by created_at limit %s"%number + purls=self.purlDB.query(qst) + ret=[] + for purl in purls: + ret.append(purl.purl) + + return ret + if __name__ == '__main__': im = IndexMetaPURLManager()
--- a/restService/getPurls.py Fri May 24 16:53:09 2013 +0200 +++ b/restService/getPurls.py Wed Jun 05 17:37:09 2013 +0200 @@ -10,6 +10,7 @@ import re import config from managePurls.manageIndexMetaPURLs import IndexMetaPURLManager +import json class getPurls: @@ -62,3 +63,28 @@ return self.render.registeredPurlsResponse(purls) + +class randomSearch: + def __init__(self): + + self.purlManager = IndexMetaPURLManager() + + def GET(self): + lst = self.purlManager.getExistingRandom(3); + + return json.dumps(lst) + + +class lastEntries: + def __init__(self): + + self.purlManager = IndexMetaPURLManager() + + def GET(self): + lst = self.purlManager.getLastEntries(3) + + return json.dumps(lst) + + + +
--- a/restService/images.py Fri May 24 16:53:09 2013 +0200 +++ b/restService/images.py Wed Jun 05 17:37:09 2013 +0200 @@ -36,7 +36,8 @@ path=doc.get('TT_image',None) - if not isinstance(path, basestring): #TT_image was defined as multiple , shouldn't be the case ? + + if (path is not None) and (not isinstance(path, basestring)): #TT_image was defined as multiple , shouldn't be the case ? path=path[0]
--- a/restService/restService.py Fri May 24 16:53:09 2013 +0200 +++ b/restService/restService.py Wed Jun 05 17:37:09 2013 +0200 @@ -10,6 +10,9 @@ from redirector import redirector import logging from searcher import searcher +from getPurls import randomSearch +from getPurls import lastEntries + from searchService.searchLines import searchLines from getPurls import getPurls from searchService.searchSolr import searchSolr @@ -31,7 +34,9 @@ '/searchSolr','searchSolr', '/imagePath/(.+)','imagePath', '/imageURL/(.+)','imageURL', - '/image/(.+)','image' + '/image/(.+)','image', + '/random','randomSearch', + '/last','lastEntries' )